diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml
index 370316d1..822d73ba 100644
--- a/.github/workflows/model_test_cpu.yml
+++ b/.github/workflows/model_test_cpu.yml
@@ -32,6 +32,7 @@ env:
 jobs:
   Evaluation-Workflow:
     runs-on: aise-cluster-cpu
+    timeout-minutes: 10
     strategy:
       matrix:
         include:
@@ -113,21 +114,6 @@ jobs:
             cd ${{ env.OUT_SCRIPT_PATH }}
             ls -R
 
-      - name: Download Reference Artifact
-        id: download-artifact
-        uses: dawidd6/action-download-artifact@v3.1.2
-        with:
-          workflow: model_test_cpu.yml
-          name: FinalReport
-          run_id: ${{ vars.ModelTest_CPU_REF_ID }}
-          path: ${{ env.OUT_SCRIPT_PATH }}
-          name_is_regexp: true
-          repo: ${{ github.repository }}
-          check_artifacts: false
-          search_artifacts: false
-          skip_unpack: false
-          if_no_artifact_found: warn
-
       - name: Display structure of downloaded files
         run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R
 
@@ -135,24 +121,4 @@ jobs:
         run: |
           echo "------ Generating final report.html ------"
           cd ${{ env.OUT_SCRIPT_PATH }}
-          mkdir -p generated
           /usr/bin/bash -x generate_report.sh
-        env:
-          RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
-          BUILD_NUMBER: ${{ github.run_id }}
-          JOB_STATUS: succeed
-
-      - name: Publish Report
-        uses: actions/upload-artifact@v4
-        if: ${{ !cancelled() }}
-        with:
-          name: FinalReport
-          path: ${{ env.OUT_SCRIPT_PATH }}/generated
-
-      - name: Specify performance regression
-        if: ${{ !cancelled() }}
-        run: |
-          if [ ${{ env.is_perf_reg }} == 'true' ]; then
-            echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports."
-            exit 1
-          fi
diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml
index 3bea0b81..204d9044 100644
--- a/.github/workflows/model_test_hpu.yml
+++ b/.github/workflows/model_test_hpu.yml
@@ -32,6 +32,7 @@ env:
 jobs:
   Evaluation-Workflow:
     runs-on: aise-cluster-hpu
+    timeout-minutes: 10
     strategy:
       matrix:
         include:
@@ -114,21 +115,6 @@ jobs:
             cd ${{ env.OUT_SCRIPT_PATH }}
             ls -R
 
-      - name: Download Reference Artifact
-        id: download-artifact
-        uses: dawidd6/action-download-artifact@v3.1.2
-        with:
-          workflow: model_test_hpu.yml
-          name: FinalReport
-          run_id: ${{ vars.ModelTest_HPU_REF_ID }}
-          path: ${{ env.OUT_SCRIPT_PATH }}
-          name_is_regexp: true
-          repo: ${{ github.repository }}
-          check_artifacts: false
-          search_artifacts: false
-          skip_unpack: false
-          if_no_artifact_found: warn
-
       - name: Display structure of downloaded files
         run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R
 
@@ -136,24 +122,4 @@ jobs:
         run: |
           echo "------ Generating final report.html ------"
           cd ${{ env.OUT_SCRIPT_PATH }}
-          mkdir -p generated
           /usr/bin/bash -x generate_report.sh
-        env:
-          RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
-          BUILD_NUMBER: ${{ github.run_id }}
-          JOB_STATUS: succeed
-
-      - name: Publish Report
-        uses: actions/upload-artifact@v4
-        if: ${{ !cancelled() }}
-        with:
-          name: FinalReport
-          path: ${{ env.OUT_SCRIPT_PATH }}/generated
-
-      - name: Specify performance regression
-        if: ${{ !cancelled() }}
-        run: |
-          if [ ${{ env.is_perf_reg }} == 'true' ]; then
-            echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports."
-            exit 1
-          fi
diff --git a/.github/workflows/scripts/models/collect_log.sh b/.github/workflows/scripts/models/collect_log.sh
index d9c36650..936843ef 100644
--- a/.github/workflows/scripts/models/collect_log.sh
+++ b/.github/workflows/scripts/models/collect_log.sh
@@ -5,34 +5,39 @@
 
 set -eo pipefail
 set -x
-source /GenAIEval/.github/workflows/scripts/change_color
+
 WORKSPACE="/GenAIEval"
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
 PERF_STABLE_CHECK=true
 for i in "$@"; do
     case $i in
-        --datasets*)
-            datasets=`echo $i | sed "s/${PATTERN}//"`;;
-        --device=*)
-            device=`echo $i | sed "s/${PATTERN}//"`;;
-        --model=*)
-            model=`echo $i | sed "s/${PATTERN}//"`;;
-        --tasks=*)
-            tasks=`echo $i | sed "s/${PATTERN}//"`;;
-        *)
-            echo "Parameter $i not recognized."; exit 1;;
+    --datasets*)
+        datasets=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --device=*)
+        device=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --model=*)
+        model=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --tasks=*)
+        tasks=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    *)
+        echo "Parameter $i not recognized."
+        exit 1
+        ;;
     esac
 done
 
 log_file="/log/${device}/${model}/${device}-${tasks}-${model}-${datasets}.log"
-$BOLD_YELLOW && echo "-------- Collect logs --------" && $RESET
-
+echo "Collecting logs ......"
 echo "working in"
 pwd
 if [[ ! -f ${log_file} ]]; then
-    echo "${device};${model};${tasks};${datasets};;" >> ${WORKSPACE}/summary.log
+    echo "|${device}|${model}|${tasks}|${datasets}|NaN|" >>${WORKSPACE}/summary.log
 else
     acc=$(grep -Po "acc .*(\d+(\.\d+)?)" ${log_file} | awk -F "|" '{print $3}' | head -n 1 | sed 's/.*://;s/[^0-9.]//g')
-    echo "${device};${model};${tasks};${datasets};${acc};" >> ${WORKSPACE}/summary.log
+    echo "|${device}|${model}|${tasks}|${datasets}|${acc}|" >>${WORKSPACE}/summary.log
 fi
diff --git a/.github/workflows/scripts/models/generate_report.sh b/.github/workflows/scripts/models/generate_report.sh
index f79d7ab8..0783395a 100644
--- a/.github/workflows/scripts/models/generate_report.sh
+++ b/.github/workflows/scripts/models/generate_report.sh
@@ -3,256 +3,25 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -x
-WORKSPACE=generated
-last_log_path=FinalReport
-summaryLog=${WORKSPACE}/summary.log
-summaryLogLast=${last_log_path}/summary.log
-PATTERN='[-a-zA-Z0-9_]*='
+set -xe
 
-function main {
-    echo "summaryLog: ${summaryLog}"
-    echo "summaryLogLast: ${summaryLogLast}"
-    echo "is_perf_reg=false" >> "$GITHUB_ENV"
-    preprocessing
-    generate_html_head
-    generate_html_overview
-    generate_results
-    generate_html_footer
+function generate_header {
+    echo "### Model Test Summary" >>$GITHUB_STEP_SUMMARY
+    echo "|device|model|tasks|datasets|acc|" >>$GITHUB_STEP_SUMMARY
+    echo "| :----: | :----: | :----: | :----: | :----: |" >>$GITHUB_STEP_SUMMARY
 }
 
 function preprocessing {
-    for file_path in log/*
-    do
+    for file_path in log/*; do
         if [[ -d ${file_path} ]] && [[ -f ${file_path}/summary.log ]]; then
-            cat ${file_path}/summary.log >> ${summaryLog}
+            cat ${file_path}/summary.log >>$GITHUB_STEP_SUMMARY
         fi
     done
 }
 
-function generate_html_overview {
-    Test_Info_Title="<th colspan="4">Test Branch</th> <th colspan="4">Commit ID</th> "
-    Test_Info="<th colspan="4">${MR_source_branch}</th> <th colspan="4">${ghprbActualCommit}</th> "
-
-    cat >>${WORKSPACE}/report.html <<eof
-
-<body>
-    <div id="main">
-        <h1 align="center">GenAIEval Tests
-        [ <a href="${RUN_DISPLAY_URL}">Job-${BUILD_NUMBER}</a> ]</h1>
-      <h1 align="center">Test Status: ${JOB_STATUS}</h1>
-        <h2>Summary</h2>
-        <table class="features-table">
-            <tr>
-              <th>Repo</th>
-              ${Test_Info_Title}
-              </tr>
-              <tr>
-                    <td><a href="https://github.com/opea-project/GenAIEval">GenAIEval</a></td>
-              ${Test_Info}
-                </tr>
-        </table>
-eof
-}
-
-function generate_results {
-    cat >>${WORKSPACE}/report.html <<eof
-    <h2>Performance</h2>
-      <table class="features-table">
-        <tr>
-          <th>Device</th>
-          <th>Tasks</th>
-          <th>Model</th>
-          <th>Datasets</th>
-          <th>VS</th>
-          <th>Accuracy</th>
-        </tr>
-eof
-
-    devices=$(cat ${summaryLog} | cut -d';' -f1 | awk '!a[$0]++')
-    for device in ${devices[@]}; do
-        models=$(cat ${summaryLog} | grep "${device};" | cut -d';' -f2 | awk '!a[$0]++')
-        for model in ${models[@]}; do
-            tasks=$(cat ${summaryLog} | grep "${device};${model};" | cut -d';' -f3 | awk '!a[$0]++')
-            for task in ${tasks[@]}; do
-                datasets=$(cat ${summaryLog} | grep "${device};${model};${task};" | cut -d';' -f4 | awk '!a[$0]++')
-                for dataset in ${datasets[@]}; do
-                    benchmark_pattern="${device};${model};${task};${dataset};"
-                    acc=$(cat ${summaryLog} | grep "${benchmark_pattern}" | cut -d';' -f5 | awk '!a[$0]++')
-                    acc_last=nan
-                    if [ $(cat ${summaryLogLast} | grep -c "${benchmark_pattern}") != 0 ]; then
-                        acc_last=$(cat ${summaryLogLast} | grep "${benchmark_pattern}" | cut -d';' -f5 | awk '!a[$0]++')
-                    fi
-                    generate_core
-                done
-            done
-        done
-    done
-    cat >>${WORKSPACE}/report.html <<eof
-    </table>
-eof
-}
-
-function generate_core {
-    echo "<tr><td rowspan=3>${device}</td><td rowspan=3>${model}</td><td rowspan=3>${task}</td><td rowspan=3>${dataset}</td><td>New</td>" >>${WORKSPACE}/report.html
-    echo | awk -v acc=${acc} -v acc_l=${acc_last} '
-        function show_benchmark(a) {
-            if(a ~/[1-9]/) {
-                printf("<td>%.2f</td>\n",a);
-            }else {
-                printf("<td></td>\n");
-            }
-        }
-        function compare_new_last(a,b){
-            if(a ~/[1-9]/ && b ~/[1-9]/) {
-                target = b / a;
-                if(target >= 0.945) {
-                    status_png = "background-color:#90EE90";
-                }else {
-                    status_png = "background-color:#FFD2D2";
-                    job_status = "fail"
-                }
-                printf("<td style=\"%s\">%.2f</td>", status_png, target);
-            }else{
-                if(a == ""){
-                    job_status = "fail"
-                    status_png = "background-color:#FFD2D2";
-                    printf("<td style=\"%s\"></td>", status_png);
-                }else{
-                    printf("<td class=\"col-cell col-cell3\"></td>");
-                }
-            }
-        }
-        BEGIN {
-            job_status = "pass"
-        }{
-            // current
-            show_benchmark(acc)
-            // Last
-            printf("</tr>\n<tr><td>Last</td>")
-            show_benchmark(acc_l)
-            // current vs last
-            printf("</tr>\n<tr><td>New/Last</td>");
-            compare_new_last(acc,acc_l)
-            printf("</tr>\n");
-        } END{
-          printf("\n%s", job_status);
-        }
-    ' >>${WORKSPACE}/report.html
-    job_state=$(tail -1 ${WORKSPACE}/report.html)
-    sed -i '$s/.*//' ${WORKSPACE}/report.html
-    if [ ${job_state} == 'fail' ]; then
-        echo "is_perf_reg=true" >> "$GITHUB_ENV"
-    fi
-}
-
-function generate_html_head {
-    cat >${WORKSPACE}/report.html <<eof
-<!DOCTYPE html>
-<html lang="en">
-
-<head>
-    <meta charset="UTF-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Daily Tests - TensorFlow - Jenkins</title>
-    <style>
-        body {
-            margin: 0;
-            padding: 0;
-            background: white no-repeat left top;
-        }
-
-        #main {
-            // width: 100%;
-            margin: 20px auto 10px auto;
-            background: white;
-            -moz-border-radius: 8px;
-            -webkit-border-radius: 8px;
-            padding: 0 30px 30px 30px;
-            border: 1px solid #adaa9f;
-            -moz-box-shadow: 0 2px 2px #9c9c9c;
-            -webkit-box-shadow: 0 2px 2px #9c9c9c;
-        }
-
-        .features-table {
-            width: 100%;
-            margin: 0 auto;
-            border-collapse: separate;
-            border-spacing: 0;
-            text-shadow: 0 1px 0 #fff;
-            color: #2a2a2a;
-            background: #fafafa;
-            background-image: -moz-linear-gradient(top, #fff, #eaeaea, #fff);
-            /* Firefox 3.6 */
-            background-image: -webkit-gradient(linear, center bottom, center top, from(#fff), color-stop(0.5, #eaeaea), to(#fff));
-            font-family: Verdana, Arial, Helvetica
-        }
-
-        .features-table th,
-        td {
-            text-align: center;
-            height: 25px;
-            line-height: 25px;
-            padding: 0 8px;
-            border: 1px solid #cdcdcd;
-            box-shadow: 0 1px 0 white;
-            -moz-box-shadow: 0 1px 0 white;
-            -webkit-box-shadow: 0 1px 0 white;
-            white-space: nowrap;
-        }
-
-        .no-border th {
-            box-shadow: none;
-            -moz-box-shadow: none;
-            -webkit-box-shadow: none;
-        }
-
-        .col-cell {
-            text-align: center;
-            width: 150px;
-            font: normal 1em Verdana, Arial, Helvetica;
-        }
-
-        .col-cell3 {
-            background: #efefef;
-            background: rgba(144, 144, 144, 0.15);
-        }
-
-        .col-cell1,
-        .col-cell2 {
-            background: #B0C4DE;
-            background: rgba(176, 196, 222, 0.3);
-        }
-
-        .col-cellh {
-            font: bold 1.3em 'trebuchet MS', 'Lucida Sans', Arial;
-            -moz-border-radius-topright: 10px;
-            -moz-border-radius-topleft: 10px;
-            border-top-right-radius: 10px;
-            border-top-left-radius: 10px;
-            border-top: 1px solid #eaeaea !important;
-        }
-
-        .col-cellf {
-            font: bold 1.4em Georgia;
-            -moz-border-radius-bottomright: 10px;
-            -moz-border-radius-bottomleft: 10px;
-            border-bottom-right-radius: 10px;
-            border-bottom-left-radius: 10px;
-            border-bottom: 1px solid #dadada !important;
-        }
-    </style>
-</head>
-eof
-}
-
-function generate_html_footer {
-    cat >>${WORKSPACE}/report.html <<eof
-    </div>
-</body>
-</html>
-eof
+function main {
+    generate_header
+    preprocessing
 }
 
 main
diff --git a/.github/workflows/scripts/models/model_test.sh b/.github/workflows/scripts/models/model_test.sh
index 9b1d4a8e..a593d4ef 100644
--- a/.github/workflows/scripts/models/model_test.sh
+++ b/.github/workflows/scripts/models/model_test.sh
@@ -5,8 +5,11 @@
 
 set -o pipefail
 set -x
-source /GenAIEval/.github/workflows/scripts/change_color
 git config --global --add safe.directory /GenAIEval
+
+export TQDM_POSITION=-1    # fix progress bar on tty mode
+export TQDM_MININTERVAL=60 # set refresh every 60s
+
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
 PERF_STABLE_CHECK=true
@@ -33,7 +36,7 @@ main() {
         "code-generation")
             working_dir="/GenAIEval/evals/evaluation/bigcode_evaluation_harness/examples";;
         *)
-            echo "Not suppotted task"; exit 1;;
+            echo "Not supported task"; exit 1;;
     esac
     if [[ ${model} == *"opt"* ]]; then
         pretrained="facebook/${model}"
@@ -47,25 +50,12 @@ main() {
     fi
     log_dir="/log/${device}/${model}"
     mkdir -p ${log_dir}
-    $BOLD_YELLOW && echo "-------- evaluation start --------" && $RESET
     run_benchmark
     cp ${log_dir}/${device}-${tasks}-${model}-${datasets}.log /GenAIEval/
 }
 
-function prepare() {
-    ## prepare env
-    cd ${working_dir}
-    echo "Working in ${working_dir}"
-    echo -e "\nInstalling model requirements..."
-    if [ -f "requirements.txt" ]; then
-        python -m pip install -r requirements.txt
-        pip list
-    else
-        echo "Not found requirements.txt file."
-    fi
-}
-
 function run_benchmark() {
+    echo "::group::evaluation start"
     cd ${working_dir}
     overall_log="${log_dir}/${device}-${tasks}-${model}-${datasets}.log"
     python main.py \
@@ -74,13 +64,14 @@ function run_benchmark() {
         --tasks ${datasets} \
         --device ${device} \
         --batch_size 112  2>&1 | tee ${overall_log}
+    echo "::endgroup::"
 
-    echo "print log content:"
-    cat ${overall_log}
     status=$?
     if [ ${status} != 0 ]; then
-        echo "Evaluation process returned non-zero exit code."
+        echo "::error::Evaluation process returned non-zero exit code!"
         exit 1
+    else
+        echo "Evaluation process completed successfully!"
     fi
 }