diff --git a/onebench/codegeex/README.md b/onebench/codegeex/README.md new file mode 100644 index 0000000..399eb25 --- /dev/null +++ b/onebench/codegeex/README.md @@ -0,0 +1,16 @@ +使用说明: + +- 在A100机器上运行 +- 在正常terminal中运行bash initialize_docker.sh +- 在打开的container bash中运行 bash original_script.sh +- 确保environment.yml文件存在 + +示例输出,保存在results_table.md中: + +| L | OneFlow[Mem(MiB)/Time(s)] | PyTorch[Mem(MiB)/Time(s)] | fastertransformer[Mem(MiB)/Time(s)] | +| --- | --- | --- | --- | +| 128 | 25687/0.039 | 26137/0.056 | 26892/2.832 | +| 256 | 25987/3.035 | 26231/4.364 | 26892/5.421 | +| 512 | 26707/9.158 | 27194/9.934 | 26892/11.236 | +| 1024 | 27763/21.968 | 28654/24.382 | 28932/25.541 | +| 2048 | 33093/50.033 | 34028/58.842 | 30294/56.203 | \ No newline at end of file diff --git a/onebench/codegeex/environment.yml b/onebench/codegeex/environment.yml new file mode 100644 index 0000000..751fe01 --- /dev/null +++ b/onebench/codegeex/environment.yml @@ -0,0 +1,82 @@ +name: py37 +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - ca-certificates=2023.01.10=h06a4308_0 + - certifi=2022.12.7=py37h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.4.2=h6a678d5_6 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - ncurses=6.4=h6a678d5_0 + - openssl=1.1.1s=h7f8727e_0 + - pip=22.3.1=py37h06a4308_0 + - python=3.7.16=h7a1cb2a_0 + - readline=8.2=h5eee18b_0 + - setuptools=65.6.3=py37h06a4308_0 + - sqlite=3.40.1=h5082296_0 + - tk=8.6.12=h1ccaba5_0 + - wheel=0.37.1=pyhd3eb1b0_0 + - xz=5.2.10=h5eee18b_1 + - zlib=1.2.13=h5eee18b_0 + - pip: + - backcall==0.2.0 + - charset-normalizer==3.0.1 + - cpm-kernels==1.0.11 + - decorator==5.1.1 + - deepspeed==0.8.0 + - filelock==3.9.0 + - fire==0.5.0 + - hjson==3.1.0 + - huggingface-hub==0.12.0 + - idna==3.4 + - importlib-metadata==6.0.0 + - ipython==7.34.0 + - jedi==0.18.2 + - markdown-it-py==2.1.0 + - matplotlib-inline==0.1.6 + - mdurl==0.1.2 + - ninja==1.11.1 + - numpy==1.21.6 + - nvidia-cublas-cu11==11.10.3.66 + - nvidia-cuda-nvrtc-cu11==11.7.99 + - nvidia-cuda-runtime-cu11==11.7.99 + - nvidia-cudnn-cu11==8.5.0.96 + - oneflow==0.9.1.dev20230311+cu117 + - packaging==23.0 + - pandas==1.3.5 + - parso==0.8.3 + - pexpect==4.8.0 + - pickleshare==0.7.5 + - pillow==9.4.0 + - prompt-toolkit==3.0.36 + - protobuf==3.20.3 + - psutil==5.9.4 + - ptyprocess==0.7.0 + - py-cpuinfo==9.0.0 + - pydantic==1.10.4 + - pygments==2.14.0 + - python-dateutil==2.8.2 + - pytz==2022.7.1 + - pyyaml==6.0 + - pyzmq==25.0.0 + - regex==2022.10.31 + - requests==2.28.2 + - rich==13.3.1 + - six==1.16.0 + - termcolor==2.2.0 + - tokenizers==0.11.4 + - torch==1.13.1 + - torchaudio==0.13.1 + - torchvision==0.14.1 + - tqdm==4.64.1 + - traitlets==5.9.0 + - transformers==4.24.0 + - typing-extensions==4.4.0 + - urllib3==1.26.14 + - wcwidth==0.2.6 + - zipp==3.13.0 +prefix: /home/oyy/miniconda3/envs/py37 diff --git a/onebench/codegeex/extract_log.py b/onebench/codegeex/extract_log.py new file mode 100644 index 0000000..9741bf7 --- /dev/null +++ b/onebench/codegeex/extract_log.py @@ -0,0 +1,65 @@ +import os +import re +import numpy as np +import argparse + +def process_logs(log_files_prefix, num_runs, is_faster_transformer=False): + memory_usage = [] + process_code_time = [] + + for i in range(1, num_runs + 1): + with open(f"{log_files_prefix}_{i}.log", "r") as f: + content = f.read() + + mem = re.search(r"\d+/\d+/\d+ \d+:\d+:\d+\.\d+, NVIDIA A100-PCIE-40GB, \d+\.\d+\.\d+, \d+ %, \d+ %, \d+ MiB, \d+ MiB, (\d+) MiB", content) + if is_faster_transformer: + time = re.search(r"process_code time used (\d+\.\d+)", content) + else: + time = re.search(r"Total generation time: (\d+\.\d+)", content) + + if mem and time: + memory_usage.append(int(mem.group(1))) + process_code_time.append(float(time.group(1))) + + return np.mean(memory_usage), np.mean(process_code_time) + +def main(logs_path, framework_list): + lengths = [128, 256, 512, 1024, 2048] + num_runs = 10 + framework_list = ["oneflow", "pytorch", "faster_transformer"] + + results = {} + + for length in lengths: + results[length] = {} + + for framework in framework_list: + log_files_prefix = os.path.join(logs_path_dict[framework], f"{length}_{framework}_run") + avg_memory, avg_time = process_logs(log_files_prefix, num_runs, is_faster_transformer=(framework == "faster_transformer")) + results[length][framework] = (avg_memory, avg_time) + + markdown_table = "| L | OneFlow[Mem(MiB)/Time(s)] | PyTorch[Mem(MiB)/Time(s)] | FasterTransformer[Mem(MiB)/Time(s)] |\n| --- | --- | --- | --- |\n" + + for length, framework_results in results.items(): + row = f"| {length} | {framework_results['oneflow'][0]:.2f}/{framework_results['oneflow'][1]:.3f} | {framework_results['pytorch'][0]:.2f}/{framework_results['pytorch'][1]:.3f} | {framework_results['faster_transformer'][0]:.2f}/{framework_results['faster_transformer'][1]:.3f} |\n" + markdown_table += row + + with open("results_table.md", "w") as f: + f.write(markdown_table) + + print(markdown_table) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--oneflow_logs_path", type=str, required=True, help="Path to the OneFlow log files") + parser.add_argument("--pytorch_logs_path", type=str, required=True, help="Path to the PyTorch log files") + parser.add_argument("--faster_transformer_logs_path", type=str, required=True, help="Path to the FasterTransformer log files") + args = parser.parse_args() + + logs_path_dict = { + "oneflow": args.oneflow_logs_path, + "pytorch": args.pytorch_logs_path, + "faster_transformer": args.faster_transformer_logs_path, + } + + main(logs_path_dict) \ No newline at end of file diff --git a/onebench/codegeex/initialize_docker.sh b/onebench/codegeex/initialize_docker.sh new file mode 100644 index 0000000..47b89a9 --- /dev/null +++ b/onebench/codegeex/initialize_docker.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +DOCKER_IMAGE="nvcr.io/nvidia/pytorch:21.11-py3" +DOCKER_NAME=$(openssl rand -hex 10) +PORT=$(shuf -i 8000-9999 -n 1) + +docker pull $DOCKER_IMAGE +docker run -p $PORT:5002 --cpus 12 --gpus '"device=0"' -it -d --ipc=host --name=$DOCKER_NAME -v $(pwd):/workspace $DOCKER_IMAGE +docker cp /data/home/codegeex_13b.pt $DOCKER_NAME:/workspace/ +docker cp /data/home/ouyangyu/codegeex/codegeex-fastertransformer/codegeex_13b_ft.pt $DOCKER_NAME:/workspace/ +docker exec -it $DOCKER_NAME /bin/bash \ No newline at end of file diff --git a/onebench/codegeex/original_script.sh b/onebench/codegeex/original_script.sh new file mode 100644 index 0000000..d97aab0 --- /dev/null +++ b/onebench/codegeex/original_script.sh @@ -0,0 +1,98 @@ +#!/bin/bash +conda init bash +source /opt/conda/etc/profile.d/conda.sh +if conda env list | grep -q '^py37\s'; then + echo "Environment 'py37' exists. Activating it now." + conda activate py37 +else + echo "Environment 'py37' does not exist. Creating it from 'environment.yml'." + conda env create -f environment.yml + conda activate py37 +fi +GPU_ID=0 +git clone https://github.com/Oneflow-Inc/one-codegeex.git +cd one-codegeex +python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple +pip install -e . +pip install torch +pip install --pre oneflow -f https://staging.oneflow.info/branch/master/cu117 +pip install cpm_kernels +pip install deepspeed +pip install transformers +pip install xgboost + +echo "sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))" | cat - tests/test_inference.py > temp && mv temp tests/test_inference.py +echo "sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))" | cat - tests/test_inference_oneflow.py > temp && mv temp tests/test_inference_oneflow.py +echo "import sys, os" | cat - tests/test_inference.py > temp && mv temp tests/test_inference.py +echo "import sys, os" | cat - tests/test_inference_oneflow.py > temp && mv temp tests/test_inference_oneflow.py +cat << 'EOF' > configs/codegeex_13b.sh +# CodeGeeX-13B configuration + +CHECKPOINT_PATH="/workspace/codegeex_13b.pt" + +MODEL_ARGS="--num-layers 39 \ + --hidden-size 5120 \ + --num-attention-heads 40 \ + --max-position-embeddings 2048 \ + --attention-softmax-in-fp32 \ + --load "$CHECKPOINT_PATH" \ + --layernorm-epsilon 1e-5 \ + --fp16 \ + --ws-encoding-start-id 10 \ + --ws-encoding-length 10 \ + --make-vocab-size-divisible-by 52224 \ + --seq-length 2048" +EOF +sed -i 's|default=39,|default=40,|g' tests/test_inference_oneflow.py +sed -i '129,130s|state_dict.*|pass|g' tests/test_inference_oneflow.py +sed -i '134s|model.load_state_dict(state_dict)|pass|g' tests/test_inference_oneflow.py +sed -i '/print(times)/i \ import os\n cmd = "nvidia-smi --query-gpu=timestamp,name,driver_version,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv"\n os.system(cmd)' tests/test_inference_oneflow.py +sed -i '/print(times)/i \ import os\n cmd = "nvidia-smi --query-gpu=timestamp,name,driver_version,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv"\n os.system(cmd)' tests/test_inference.py +sed -i '326s|break|pass|g' codegeex/oneflow/inference.py +sed -i 's|--out-seq-length 1024|--out-seq-length $OUTPUT_LEN|g' scripts/test_inference_oneflow.sh +sed -i '7i OUTPUT_LEN=$3' scripts/test_inference_oneflow.sh +sed -i 's|--out-seq-length 1024|--out-seq-length $OUTPUT_LEN|g' scripts/test_inference.sh +sed -i '7i OUTPUT_LEN=$3' scripts/test_inference.sh + +for length in 128 256 512 1024 2048 +do + script_name="test_inference.sh" + + for i in {1..10} + do + bash ./scripts/$script_name $GPU_ID ./tests/test_prompt.txt $length 2>&1 | tee ${length}_pytorch_run_${i}.log + done + sleep 60 + script_name="test_inference_oneflow.sh" + + for i in {1..10} + do + bash ./scripts/$script_name $GPU_ID ./tests/test_prompt.txt $length 2>&1 | tee ${length}_oneflow_run_${i}.log + done + sleep 60 + +done + +cd .. +WORK_DIR=$(pwd) +git clone https://github.com/CodeGeeX/codegeex-fastertransformer.git + +cd codegeex-fastertransformer && \ +python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ +pip3 install transformers sentencepiece && \ +sh make_all.sh && \ +python3 api.py --output_len 2048 --ckpt_path /workspace/codegeex_13b_ft.pt --lib_path /workspace/codegeex-fastertransformer/build/lib/libth_codegeex.so & +FLASK_PID=$! +for length in 128 256 512 1024 2048 +do + echo "Running for output length: $length" + for ((i=1; i<=10; i++)); do + echo "Iteration: $i" + cd codegeex-fastertransformer && \ + python3 post.py --output_len $length 2>&1 | tee -a ${length}_faster_transformer_run_${i}.log + nvidia-smi --query-gpu=timestamp,name,driver_version,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv + echo "------------------------$length--------------------------" + done + sleep 20s +done +kill $FLASK_PID \ No newline at end of file