[Update] Update o1 eval prompt #1491
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: pr_run_test | |
on: | |
pull_request: | |
paths-ignore: | |
- 'README.md' | |
- 'README_zh-CN.md' | |
- 'docs/**' | |
- 'configs/**' | |
- 'tools/**' | |
workflow_dispatch: | |
schedule: | |
- cron: '56 22 * * *' | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
env: | |
CONDA_ENV: pr_test | |
HF_DATASETS_OFFLINE: 1 | |
HF_EVALUATE_OFFLINE: 1 | |
TRANSFORMERS_OFFLINE: 1 | |
VLLM_USE_MODELSCOPE: false | |
LMDEPLOY_USE_MODELSCOPE: false | |
HF_HUB_OFFLINE: 1 | |
CONDA_PATH: /fs-computility/llm/qa-llm-cicd/miniconda3 | |
PIP_CACHE_PATH: /fs-computility/llm/qa-llm-cicd/.cache/pip | |
REPORT_ROOT: /fs-computility/llm/qa-llm-cicd/eval_report/prtest | |
COMPASS_DATA_CACHE: /fs-computility/llm/shared/llmeval/datasets/compass_data_cache | |
HUGGINGFACE_HUB_CACHE: /fs-computility/llm/shared/llmeval/models/opencompass_hf_hub | |
HF_HUB_CACHE: /fs-computility/llm/shared/llmeval/models/opencompass_hf_hub | |
jobs: | |
pr_run_test: | |
runs-on: volc_cu12_local | |
environment: 'prod' | |
timeout-minutes: 30 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v2 | |
- name: Prepare - Install opencompass | |
run: | | |
. ${{env.CONDA_PATH}}/bin/activate | |
conda activate ${{env.CONDA_ENV}} | |
python3 -m pip uninstall opencompass -y | |
python3 -m pip install -e . --cache-dir ${{env.PIP_CACHE_PATH}} | |
conda info --envs | |
- name: conda env | |
run: | | |
. ${{env.CONDA_PATH}}/bin/activate | |
conda activate ${{env.CONDA_ENV}} | |
conda info --envs | |
pip list | |
lmdeploy check_env | |
- name: Run test | |
run: | | |
. ${{env.CONDA_PATH}}/bin/activate | |
conda activate ${{env.CONDA_ENV}} | |
conda info --envs | |
rm -rf regression_result | |
opencompass --models hf_internlm2_5_20b_chat --datasets demo_gsm8k_chat_gen --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result1 --debug | |
opencompass --models hf_internlm2_5_7b_chat --datasets demo_gsm8k_chat_gen --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result2 --debug --max-num-workers 2 | |
opencompass --models hf_internlm2_5_7b_chat --datasets demo_gsm8k_chat_gen -a lmdeploy --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result3 --debug --max-num-workers 2 | |
- name: Get result | |
run: | | |
score=$(sed -n '$p' ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result1/*/summary/*.csv | awk -F ',' '{print $NF}') | |
if (( ${score%.*} >= 88 && ${score%.*} <= 89 )); then | |
echo "score is $score between 88 and 89" | |
else | |
echo "score is $score not between 88 and 89" | |
exit 1 | |
fi | |
score=$(sed -n '$p' ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result2/*/summary/*.csv | awk -F ',' '{print $NF}') | |
if (( ${score%.*} >= 87 && ${score%.*} <= 88 )); then | |
echo "score is $score between 87 and 88" | |
else | |
echo "score is $score not between 87 and 88" | |
exit 1 | |
fi | |
score=$(sed -n '$p' ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result3/*/summary/*.csv | awk -F ',' '{print $NF}') | |
if (( ${score%.*} >= 87 && ${score%.*} <= 91 )); then | |
echo "score is $score between 87 and 91" | |
else | |
echo "score is $score not between 87 and 91" | |
exit 1 | |
fi | |
- name: Uninstall opencompass | |
if: always() | |
run: | | |
. ${{env.CONDA_PATH}}/bin/activate | |
conda activate ${{env.CONDA_ENV}} | |
python3 -m pip uninstall opencompass -y | |
conda info --envs | |
notify_to_feishu: | |
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }} | |
needs: [pr_run_test] | |
environment: 'prod' | |
timeout-minutes: 5 | |
runs-on: self-hosted | |
steps: | |
- name: notify | |
run: | | |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- pr test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }} |