index.html

<!DOCTYPE html>
<html lang="en">
  <head>
    <title>MMMU</title>
    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
    <script src="https://kit.fontawesome.com/f8ddf9854a.js" crossorigin="anonymous"></script>
    <meta charset="utf-8">
    <meta name="description"
          content="A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI">
    <meta name="keywords" content="MMMU, LMM, LMM Evaluation, Vision Language Model, Large Language Model, Large Multimodal Model, artificial intelligence, AI, AGI, artificial general intelligence">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title> MMMU: A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI</title>

    <link rel="icon" href="./static/images/mmmu_icon2.png">

    <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">

    <link rel="stylesheet" href="./static/css/bulma.min.css">
    <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
    <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
    <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
    <link rel="stylesheet" href="./static/css/index.css">
    <script src="https://kit.fontawesome.com/fff5b27ec1.js" crossorigin="anonymous"></script>
    <!-- <script src="https://kit.fontawesome.com/eaf1856e6f.js" crossorigin="anonymous"></script> -->
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
    <script defer src="./static/js/fontawesome.all.min.js"></script>
    <script src="./static/js/bulma-carousel.min.js"></script>
    <script src="./static/js/bulma-slider.min.js"></script>
    <script src="./static/js/index.js"></script>
  </head>
  <body>

    <nav class="navbar" role="navigation" aria-label="main navigation">
      <div class="navbar-brand">
        <a role="button" class="navbar-burger" aria-label="menu" aria-expanded="false">
          <span aria-hidden="true"></span>
          <span aria-hidden="true"></span>
          <span aria-hidden="true"></span>
        </a>
      </div>
      <div class="navbar-menu">
        <div class="navbar-start" style="flex-grow: 1; justify-content: center;">
          <div class="navbar-item has-dropdown is-hoverable">
            <a class="navbar-link">
              More Research
            </a>
            <div class="navbar-dropdown">
              <a class="navbar-item" href="https://huggingface.co/datasets/MMMU/MMMU_Pro">
                <b>MMMU-Pro</b> <span style="font-size:18px; display: inline; margin-left: 5px;">🔥</span>
              </a>
              <a class="navbar-item" href="https://tiger-ai-lab.github.io/MAmmoTH/">
                MAmmoTH
              </a>
              <a class="navbar-item" href="https://osu-nlp-group.github.io/TableLlama/">
                TableLlama
              </a>
              <a class="navbar-item" href="https://osu-nlp-group.github.io/MagicBrush/">
                MagicBrush
              </a>
              <a class="navbar-item" href="https://osu-nlp-group.github.io/Mind2Web/">
                Mind2Web
              </a>
            </div>
          </div>
        </div>
      </div>
    </nav>

    <section class="hero">
      <div class="hero-body">
        <div class="container is-max-desktop">
          <div class="columns is-centered">
            <div class="column has-text-centered">
              <h1 class="title is-1 publication-title is-bold">
                <img src="static/images/mmmu_icon2.png" style="width:1em;vertical-align: middle" alt="Logo"/>
                <span class="mmmu" style="vertical-align: middle">MMMU</span>
              </h1>
              <h2 class="subtitle is-3 publication-subtitle">
                A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI
              </h2>
              <div class="is-size-5 publication-authors">
                <span class="author-block">
                  <a href="https://xiangyue9607.github.io/" style="text-decoration: none; color: inherit;">Xiang Yue*†</a>,
                </span>
                <span class="author-block">
                  <a href="https://yuanshengni.github.io/" style="text-decoration: none; color: inherit;">Yuansheng Ni*</a>,
                </span>
                <span class="author-block">
                  <a href="https://drogozhang.github.io/" style="text-decoration: none; color: inherit;">Kai Zhang*</a>,
                </span>
                <span class="author-block">
                  <a href="https://scholar.google.com/citations?hl=en&user=Vq-VZnUAAAAJ" style="text-decoration: none; color: inherit;">Tianyu Zheng*</a>,
                </span>
                <br>
                <span class="author-block">Ruoqi Liu,</span>
                <span class="author-block">Ge Zhang,</span>
                <span class="author-block">Samuel Stevens,</span>
                <span class="author-block">Dongfu Jiang,</span>
                <span class="author-block">Weiming Ren,</span>
                <span class="author-block">Yuxuan Sun,</span>
                <span class="author-block">Cong Wei,</span>
                <span class="author-block">Botao Yu,</span>
                <span class="author-block">Ruibin Yuan,</span>
                <span class="author-block">Renliang Sun,</span>
                <span class="author-block">Ming Yin,</span>
                <span class="author-block">Boyuan Zheng,</span>
                <span class="author-block">Zhenzhu Yang,</span>
                <span class="author-block">Yibo Liu,</span>
                <span class="author-block">Wenhao Huang,</span><br>
                <span class="author-block">
                  <a href="https://web.cse.ohio-state.edu/~sun.397/" style="text-decoration: none; color: inherit;">Huan Sun*</a>,
                </span>
                <span class="author-block">
                  <a href="https://ysu1989.github.io/" style="text-decoration: none; color: inherit;">Yu Su*†</a>,
                </span>
                <span class="author-block">
                  <a href="https://wenhuchen.github.io/" style="text-decoration: none; color: inherit;">Wenhu Chen*†</a>
                </span>
              </div>

              <br>

              <div class="is-size-5 publication-authors">
                <span class="author-block"><b>MMMU Team</b></span>
              </div>

              <br>
              <div class="is-size-5 publication-authors">
                <span class="author-block">*Core Contributors</span><br>
                <span class="author-block">†Corresponding to:</span>
                <span class="author-block"><a href="mailto:xiangyue.work@gmail.com">xiangyue.work@gmail.com</a>,</span>
                <span class="author-block"><a href="mailto:su.809@osu.edu">su.809@osu.edu</a>,</span>
                <span class="author-block"><a href="mailto:wenhuchen@uwaterloo.ca">wenhuchen@uwaterloo.ca</a></span>
              </div>

              <div class="column has-text-centered">
                <div class="publication-links">
                  <span class="link-block">
                    <a href="https://arxiv.org/abs/2311.16502" class="external-link button is-normal is-rounded is-dark">
                      <span class="icon">
                        <i class="fas fa-file-pdf"></i>
                      </span>
                      <span>arXiv</span>
                    </a>
                  </span>
                  <span class="link-block">
                    <a href="https://huggingface.co/datasets/MMMU/MMMU_Pro" class="external-link button is-normal is-rounded is-dark">
                      <span class="icon" style="font-size:18px">🤗</span>
                      <span>MMMU-Pro</span>
                    </a>
                  </span>
                  <span class="link-block">
                    <a href="https://huggingface.co/datasets/MMMU/MMMU" class="external-link button is-normal is-rounded is-dark">
                      <span class="icon" style="font-size:18px">🤗</span>
                      <span>MMMU</span>
                    </a>
                  </span>
                  <span class="link-block">
                    <a href="https://github.com/MMMU-Benchmark/MMMU" class="external-link button is-normal is-rounded is-dark">
                      <span class="icon">
                        <i class="fab fa-github"></i>
                      </span>
                      <span>Code</span>
                    </a>
                  </span>
                  <span class="link-block">
                    <a href="#leaderboard" class="external-link button is-normal is-rounded is-dark">
                      <span class="icon has-text-white">
                        <i class="fa-solid fa-trophy"></i>
                      </span>
                      <span>Leaderboard</span>
                    </a>
                  </span>
                  <span class="link-block">
                    <a href="https://eval.ai/web/challenges/challenge-page/2179/overview" class="external-link button is-normal is-rounded is-dark">
                      <span class="icon has-text-white">
                        <i class="fa-solid fa-medal"></i>
                      </span>
                      <span>EvalAI</span>
                    </a>
                  </span>
                  <span class="link-block">
                    <a href="https://twitter.com/xiangyue96/status/1729698316554801358" class="external-link button is-normal is-rounded is-dark">
                      <span class="icon has-text-white">
                        <i class="fa-brands fa-x-twitter"></i>
                      </span>
                      <span>Twitter</span>
                    </a>
                  </span>
                  <span class="link-block">
                    <a href="#examples" class="external-link button is-normal is-rounded is-dark">
                      <span class="icon has-text-white">
                        <i class="fa-solid fa-book"></i>
                      </span>
                      <span>Examples</span>
                    </a>
                  </span>
                </div>
              </div>
            </div>
          </div>
        </div>
      </div>
    </section>

    <section class="hero teaser">
      <div class="container is-max-desktop has-text-centered">
        <img src="static/images/overview_mmlu.Jpeg" alt="geometric reasoning">
        <p>Overview of the MMMU dataset. MMMU presents four challenges:
          1) <b>comprehensiveness</b>: 11.5K college-level problems across six broad disciplines and 30 college subjects;
          2) highly <b>heterogeneous</b> image types;
          3) <b>interleaved</b> text and images;
          4) <b>expert-level</b> perception and reasoning rooted in deep subject knowledge.
        </p>
      </div>
    </section>

    <section class="section">
      <div class="container" style="margin-bottom: 2vh;">
        <!-- Abstract. -->
        <div class="columns is-centered has-text-centered">
          <div class="column is-four-fifths">
            <h2 class="title is-3">🔔News</h2>
            <div class="content has-text-justified">
              <p>
                <b>🔥[2024-09-05] Introducing <a href="https://arxiv.org/abs/2409.02813">MMMU-Pro</a>, a robust version of MMMU benchmark for multimodal AI evaluation! 🚀</b>
              </p>
              <p>
                <b>🚀[2024-01-31]: We added Human Expert performance on the <a href="#leaderboard">Leaderboard</a>!🌟</b>
              </p>
              <p>
                <b>🔥[2023-12-04]: Our evaluation server for the test set is now available on <a href="https://eval.ai/web/challenges/challenge-page/2179/overview"><b>EvalAI</b></a>. We welcome all submissions and look forward to your participation! 😆</b>
              </p>
          </div>
            <h2 class="title is-3">Introduction</h2>
            <div class="content has-text-justified">
              <p>
                We introduce MMMU: a new benchmark designed to evaluate multimodal models on massive multi-discipline tasks demanding college-level subject knowledge and deliberate reasoning. MMMU includes <b>11.5K</b> meticulously collected multimodal questions from college exams, quizzes, and textbooks, covering six core disciplines: Art & Design, Business, Science, Health & Medicine, Humanities & Social Science, and Tech & Engineering. These questions span <b>30</b> subjects and <b>183</b> subfields, comprising 30 highly heterogeneous image types, such as charts, diagrams, maps, tables, music sheets, and chemical structures. Unlike existing benchmarks, MMMU focuses on advanced perception and reasoning with domain-specific knowledge, challenging models to perform tasks akin to those faced by experts. Our evaluation of 14 open-source LMMs and the proprietary GPT-4V(ision) highlights the substantial challenges posed by MMMU. Even the advanced GPT-4V only achieves a 56% accuracy, indicating significant room for improvement. We believe MMMU will stimulate the community to build next-generation multimodal foundation models towards expert artificial general intelligence.
              </p>
            </div>
          </div>
        </div>
        <!--/ Abstract. -->
    </div>
    </section>

    <!-- DATASET SECTION -->
    <section class="hero is-light is-small">
      <div class="hero-body has-text-centered">
        <h1 class="title is-1 mmmu">
          <img src="static/images/mmmu_icon2.png" alt="Logo" class="mmmu-logo"/>
          <span class="mmmu">MMMU Benchmark</span>
        </h1>
      </div>
    </section>

    <section class="section">
      <div class="container">
        <div class="columns is-centered has-text-centered">
          <div class="column is-four-fifths">
            <h2 class="title is-3">Overview</h2>
            <div class="content has-text-justified">
              <p>
                We introduce the Massive Multi-discipline Multimodal Understanding and Reasoning (MMMU) benchmark, a novel benchmark meticulously curated to assess the expert-level multimodal understanding capability of foundation models across a broad scope of tasks. Covering subjects across disciplines, including Art, Business, Health & Medicine, Science, Humanities & Social Science, and Tech & Engineering, and over subfields. The detailed subject coverage and statistics are detailed in the figure. The questions in our benchmark were manually collected by a team of college students (including coauthors) from various disciplines and subjects, drawing from online sources, textbooks, and lecture materials.
              </p>
              <img src="static/images/mmlu_example.Jpeg" alt="algebraic reasoning" class="center">
              <br>
              <p>
                 MMMU is designed to measure three essential skills in LMMs: perception, knowledge, and reasoning. Our aim is to evaluate how well these models can not only perceive and understand information across different modalities but also apply reasoning with subject-specific knowledge to derive the solution.
              </p>
              <p>
                Our MMMU benchmark introduces key challenges to multimodal foundation models, as detailed in a figure. Among these, we particularly highlight the challenge stemming from the requirement for both expert-level visual perceptual abilities and deliberate reasoning with subject-specific knowledge. This challenge is vividly illustrated through our tasks, which not only demand the processing of various heterogeneous image types but also necessitate a model's adeptness in using domain-specific knowledge to deeply understand both the text and images and to reason. This goes significantly beyond basic visual perception, calling for an advanced approach that integrates advanced multimodal analysis with domain-specific knowledge.
              </p>
            </div>
          </div>
        </div>

        <div class="columns is-centered has-text-centered">
          <div class="column is-four-fifths">
            <h2 class="title is-3">Comparisons with Existing Benchmarks</h2>
            <div class="content has-text-justified">
              <p>
                To further distinguish the difference between <i>dataset</i> and other existing ones, we elaborate the benchmark details in Figure.
                From the <i>breadth</i> perspective, the prior benchmarks are heavily focused on daily knowledge and common sense.
                The covered image format is also limited. Our benchmark aims to cover college-level knowledge with 30 image formats including diagrams,
                tables, charts, chemical structures, photos, paintings, geometric shapes, music sheets, medical images, etc.
                In the <i>depth</i> aspect, the previous benchmarks normally require commonsense knowledge or simple physical or temporal reasoning.
                In contrast, our benchmark requires deliberate reasoning with college-level subject knowledge.
              </p>
              <div class="content has-text-centered">
                <img src="static/images/compare.Jpeg" alt="algebraic reasoning" class="center">
                <p> Sampled MMMU examples from each discipline. The questions and images need expert-level knowledge to understand and reason.</p>
              </div>
            </div>
          </div>
        </div>

        <div class="columns is-centered m-6">
          <div class="column is-full has-text-centered content">
            <h2 class="title is-3">Statistics</h2>
            <div class="carousel results-carousel">
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/mmmu_subject_distribution.Jpeg" alt="algebraic reasoning" width="95%"/>
                  <p> Sampled MMMU examples from each discipline. The questions and images need expert-level knowledge to understand and reason.</p>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/statistics.png" alt="arithmetic reasoning" width="40%"/>
                  <p> Key statistics of the MMMU benchmark</p>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/image_type_count.png" alt="arithmetic reasoning" width="80%"/>
                  <p> Distribution of image types in the MMMU dataset</p>
                </div>
              </div>
            </div>
          </div>
        </div>
      </div>
    </section>

    <!-- RESULTS SECTION -->
    <section class="hero is-light is-small">
      <div class="hero-body has-text-centered">
        <h1 class="title is-1 mmmu">Experiment Results</h1>
      </div>
    </section>

    <section class="section">
      <div class="container">
        <!-------------------------------------------------------------------- RESULTS SECTION -------------------------------------------------------------------->
        <div class="columns is-centered m-6">
          <div class="column is-full has-text-centered content">
            <h2 class="title is-3" id="leaderboard">Leaderboard</h2>
            <div class="content has-text-justified">
              <p>
                We evaluate various models including LLMs and LMMs.
                In each type, we consider both closed- and open-source models.
                Our evaluation is conducted under a zero-shot setting to assess the capability of models to generate accurate answers without fine-tuning or few-shot demonstrations on our benchmark.
                For all models, we use the default prompt provided by each model for multi-choice or open QA, if available.
                If models do not provide prompts for task types in MMMU, we conduct prompt engineering on the validation set and use the most effective prompt for the later zero-shot experiment.
              </p>
            </div>
            <br>
            <div class="model-labels-container">
              <span class="leaderboard-label human_expert">Human Expert</span>
              <span class="leaderboard-label open_source">Open-Source</span>
              <span class="leaderboard-label proprietary">Proprietary</span>
            </div>
            <br>
            <div class="content has-text-centered">
              <p>
                Click on MMMU-Pro, MMMU (Val) or MMMU (Test) to expand detailed results.
              </p>
            </div>
            <div class="leaderboard-container">
              <div class="table-wrapper">
                <table id="mmmu-table">
                  <thead>
                    <tr>
                      <th colspan="3" class="reset-cell clickable" style="text-align: center;">Reset</th>
                      <th class="pro-details-cell clickable" colspan="1">MMMU-Pro</th>
                      <th class="val-details-cell clickable" colspan="1">MMMU(Val)</th>
                      <th class="test-details-cell clickable" colspan="1">MMMU(Test)</th>
                    </tr>
                    <tr>
                      <th class="sortable clickable" data-sort="string">Name</th>
                      <th class="clickable" data-sort="string">Size</th>
                      <th class="sortable clickable" data-sort="date">Date</th>
                      <th class="sortable clickable pro-overall" data-sort="number">Overall</th>
                      <th class="hidden pro-details sortable clickable" data-sort="number">Vision</th>
                      <th class="hidden pro-details sortable clickable" data-sort="number">Standard</th>
                      <th class="sortable clickable val-overall" data-sort="number">Overall</th>
                      <th class="hidden val-details sortable clickable" data-sort="number">Art & Design</th>
                      <th class="hidden val-details sortable clickable" data-sort="number">Business</th>
                      <th class="hidden val-details sortable clickable" data-sort="number">Science</th>
                      <th class="hidden val-details sortable clickable" data-sort="number">Health & Medicine</th>
                      <th class="hidden val-details sortable clickable" data-sort="number">Human. & Social Sci.</th>
                      <th class="hidden val-details sortable clickable" data-sort="number">Tech & Eng.</th>
                      <th class="sortable clickable test-overall" data-sort="number">Overall</th>
                      <th class="hidden test-details sortable clickable" data-sort="number">Art & Design</th>
                      <th class="hidden test-details sortable clickable" data-sort="number">Business</th>
                      <th class="hidden test-details sortable clickable" data-sort="number">Science</th>
                      <th class="hidden test-details sortable clickable" data-sort="number">Health & Medicine</th>
                      <th class="hidden test-details sortable clickable" data-sort="number">Human. & Social Sci.</th>
                      <th class="hidden test-details sortable clickable" data-sort="number">Tech & Eng.</th>
                    </tr>
                  </thead>
                  <tbody>
                    <!-- Table body will be populated dynamically -->
                  </tbody>
                </table>
                <p class="test-desc"> Overall results of different models on the MMMU leaderboard. The best-performing model in each category is <b>in-bold</b>, and the second best is <u>underlined</u>. *: results provided by the authors.</p>
              </div>
            </div>
          </div>
        </div>
        <!-------------------------------------------------------------------- Image Type SECTION -------------------------------------------------------------------->
        <div class="columns is-centered m-6">
          <div class="column is-full has-text-centered content">
            <h2 class="title is-3">Different Image Types</h2>
            <div class="content has-text-justified">
              <p>
                We compare the performance of various models across top frequent image types.
                Across all types, GPT-4V consistently outperforms the other models by a huge margin.
                Open-source models demonstrate relatively strong performance in categories like Photos and Paintings, which are more frequently seen during training.
                However, for less common image categories like Geometric shapes, Music sheets and Chemical structures, all models obtain very low scores (some are close to random guesses).
                This indicates that the existing models are generalizing poorly towards these image types.
              </p>
            </div>
            <div class="model-labels-container">
              <span class="model-label" style="background-color: rgba(196, 123, 160, 0.5);">Adept Fuyu-8B</span>
              <span class="model-label" style="background-color: rgba(245, 123, 113, 0.5);">Qwen-VL-7B-Chat</span>
              <span class="model-label" style="background-color: rgba(255, 208, 80, 0.5);">LLaVA-1.5-13B</span>
              <span class="model-label" style="background-color: rgba(110, 194, 134, 0.5);">InstructBLIP-T5-XXL</span>
              <span class="model-label" style="background-color: rgba(255, 153, 78, 0.5);">BLIP-2 FLAN-T5-XXL</span>
              <span class="model-label" style="background-color: rgba(42, 149, 235, 0.5);">Yi-VL-34B</span>
              <span class="model-label" style="background-color: rgba(183, 156, 220, 0.5);">LLaVA-1.6-34B</span>
              <span class="model-label" style="background-color: rgba(143, 169, 209, 0.5);">InternVL-Chat-V1.2</span>
              <span class="model-label" style="background-color: rgba(72, 199, 176, 0.5);">VILA1.5</span>
              <span class="model-label" style="background-color: rgba(117, 209, 215, 0.5);">GPT-4V</span>
            </div>
            <div class="content has-text-centered">
              <div class="chart-grid">
                <!-- Chart 1: Diagrams -->
                <div class="chart-item">
                    <canvas id="chart_Diagrams"></canvas>
                    <p class="chart-label">Diagrams (3184)</p>
                </div>
                <!-- Chart 2: Tables -->
                <div class="chart-item">
                    <canvas id="chart_Tables"></canvas>
                    <p class="chart-label">Tables (2267)</p>
                </div>
                <!-- Chart 3: Plots and Charts -->
                <div class="chart-item">
                  <canvas id="chart_PlotsAndCharts"></canvas>
                  <p class="chart-label">Plots and Charts (840)</p>
                </div>
                <!-- Chart 4: Chemical Structures -->
                <div class="chart-item">
                  <canvas id="chart_ChemicalStructures"></canvas>
                  <p class="chart-label">Chemical Structures (573)</p>
                </div>
                <!-- Chart 5: Photographs -->
                <div class="chart-item">
                  <canvas id="chart_Photographs"></canvas>
                  <p class="chart-label">Photographs (770)</p>
                </div>
                <!-- Chart 6: Paintings -->
                <div class="chart-item">
                  <canvas id="chart_Paintings"></canvas>
                  <p class="chart-label">Paintings (453)</p>
                </div>
                <!-- Chart 7: Geometric Shapes -->
                <div class="chart-item">
                  <canvas id="chart_GeometricShapes"></canvas>
                  <p class="chart-label">Geometric Shapes (336)</p>
                </div>
                <!-- Chart 8: Sheet Music -->
                <div class="chart-item">
                  <canvas id="chart_SheetMusic"></canvas>
                  <p class="chart-label">Sheet Music (335)</p>
                </div>
                <!-- Chart 9: Medical Images -->
                <div class="chart-item">
                  <canvas id="chart_MedicalImages"></canvas>
                  <p class="chart-label">Medical Images (272)</p>
                </div>
                <!-- Chart 10: Pathological Images -->
                <div class="chart-item">
                  <canvas id="chart_PathologicalImages"></canvas>
                  <p class="chart-label">Pathological Images (253)</p>
                </div>
                <!-- Chart 11: Microscopic Images -->
                <div class="chart-item">
                  <canvas id="chart_MicroscopicImages"></canvas>
                  <p class="chart-label">Microscopic Images (226)</p>
                </div>
                <!-- Chart 12: MRI, CT scans, and X-rays -->
                <div class="chart-item">
                  <canvas id="chart_MRIsCTScansXrays"></canvas>
                  <p class="chart-label">MRI, CT scans, and X-rays (198)</p>
                </div>
                <!-- Chart 13: Sketches and Drafts -->
                <div class="chart-item">
                  <canvas id="chart_SketchesAndDrafts"></canvas>
                  <p class="chart-label">Sketches and Drafts (184)</p>
                </div>
                <!-- Chart 14: Maps -->
                <div class="chart-item">
                  <canvas id="chart_Maps"></canvas>
                  <p class="chart-label">Maps (170)</p>
                </div>
                <!-- Chart 15: Technical Blueprints -->
                <div class="chart-item">
                  <canvas id="chart_TechnicalBlueprints"></canvas>
                  <p class="chart-label">Technical Blueprints (162)</p>
                </div>
                <!-- Chart 16: Trees and Graphs -->
                <div class="chart-item">
                  <canvas id="chart_TreesAndGraphs"></canvas>
                  <p class="chart-label">Trees and Graphs (146)</p>
                </div>
                <!-- Chart 17: Mathematical Notations -->
                <div class="chart-item">
                  <canvas id="chart_MathematicalNotations"></canvas>
                  <p class="chart-label">Mathematical Notations (133)</p>
                </div>
                <!-- Chart 18: Comics and Cartoons -->
                <div class="chart-item">
                  <canvas id="chart_ComicsAndCartoons"></canvas>
                  <p class="chart-label">Comics and Cartoons (131)</p>
                </div>
                <!-- Chart 19: Sculpture -->
                <div class="chart-item">
                  <canvas id="chart_Sculpture"></canvas>
                  <p class="chart-label">Sculpture (117)</p>
                </div>
                <!-- Chart 20: Portraits -->
                <div class="chart-item">
                  <canvas id="chart_Portraits"></canvas>
                  <p class="chart-label">Portraits (91)</p>
                </div>
                <!-- Chart 21: Screenshots -->
                <div class="chart-item">
                  <canvas id="chart_Screenshots"></canvas>
                  <p class="chart-label">Screenshots (70)</p>
                </div>
                <!-- Chart 22: Other -->
                <div class="chart-item">
                  <canvas id="chart_Other"></canvas>
                  <p class="chart-label">Other(60)</p>
                </div>
                <!-- Chart 23: Poster -->
                <div class="chart-item">
                  <canvas id="chart_Poster"></canvas>
                  <p class="chart-label">Poster(57)</p>
                </div>
                <!-- Chart 24: Icons and Symbols -->
                <div class="chart-item">
                  <canvas id="chart_IconsAndSymbols"></canvas>
                  <p class="chart-label">Icons and Symbols (42)</p>
                </div>
                <!-- Chart 25: Historical Timelines -->
                <div class="chart-item">
                  <canvas id="chart_HistoricalTimelines"></canvas>
                  <p class="chart-label">Historical Timelines (30)</p>
                </div>
                <!-- Chart 26: 3D Renderings -->
                <div class="chart-item">
                  <canvas id="chart_3DRenderings"></canvas>
                  <p class="chart-label">3D Renderings (21)</p>
                </div>
                <!-- Chart 27: DNA Sequences -->
                <div class="chart-item">
                  <canvas id="chart_DNASequences"></canvas>
                  <p class="chart-label">DNA Sequences (20)</p>
                </div>
                <!-- Chart 28: Landscapes -->
                <div class="chart-item">
                  <canvas id="chart_Landscapes"></canvas>
                  <p class="chart-label">Landscapes (16)</p>
                </div>
                <!-- Chart 29: Logos and Branding -->
                <div class="chart-item">
                  <canvas id="chart_LogosAndBranding"></canvas>
                  <p class="chart-label">Logos and Branding(14)</p>
                </div>
                <!-- Chart 30: Advertisements -->
                <div class="chart-item">
                  <canvas id="chart_Advertisements"></canvas>
                  <p class="chart-label">Advertisements (10)</p>
                </div>
              </div>
              <p class="bottom-text"> Selected models' performance on 30 different image types. Note that a single image may have multiple image types.</p>
            </div>
          </div>
        </div>
        <!-------------------------------------------------------------------- Difficulty Levels SECTION -------------------------------------------------------------------->
        <div class="columns is-centered m-6">
          <div class="column is-full has-text-centered content">
            <h2 class="title is-3">Different Difficulty Levels</h2>
            <div class="content has-text-justified">
              <p>
                we compares the performance of selected models across three difficulty levels.
                GPT-4V demonstrates a significantly higher proficiency, with a success rate of 76.1%, compared to opensource models in the “Easy” category.
                When it comes to the “Medium” category, while the gap narrows, GPT-4V still leads at 55.6%.
                The further diminishing performance gap in the “Hard” category across models indicates that as the complexity of tasks increases, the advantage of more advanced models like GPT-4V almost disappears.
                This might reflect a current limitation in handling expert-level challenging queries even for the most advanced models.
              </p>
            </div>
            <div class="content has-text-centered">
              <p>Click legend to switch the comparison chart.</p>
            </div>
            <div class="content has-text-centered">
              <canvas id="difficulty_level_chart"></canvas>
              <p>Result decomposition across question difficulty levels.</p>
            </div>
          </div>
        </div>
      <!-------------------------------------------------------------------- Single VS Multiple image SECTION -------------------------------------------------------------------->
        <div class="columns is-centered m-6">
        <div class="column is-full has-text-centered content">
          <h2 class="title is-3">Single Image VS Multiple Image</h2>
          <div class="content has-text-centered">
            <div class="content has-text-centered">
              <p>Click legend to switch the comparison chart.</p>
            </div>
            <canvas id="single_vs_multiple_chart"></canvas>
            <p>Result decomposition across single image and multiple image tasks.</p>
          </div>
        </div>
        </div>
      <!-------------------------------------------------------------------- Error Analysis SECTION -------------------------------------------------------------------->
        <div class="columns is-centered m-6">
          <div class="column is-full has-text-centered content">
            <h2 class="title is-3">Error Analysis</h2>
            <div class="content has-text-justified">
              <p>
                We delve into the analysis of errors by GPT-4V, a pivotal aspect for understanding its operational capabilities and limitations.
                This analysis serves not only to identify the model's current shortcomings but also to guide future enhancements in its design and training.
                We meticulously examine 150 randomly sampled error instances from GPT-4V's predictions.
                These instances are analyzed by expert annotators who identify the root causes of mispredictions based on their knowledge and the golden explanations if available.
                The distribution of these errors is illustrated in Figure, and a selection of 100 notable cases, along with detailed analyses, is included in the Appendix.
              </p>
            </div>
            <div class="content has-text-centered">
              <img src="static/images/error_distribution_1.Jpeg" alt="error distribution" width="25%">
              <p> Error distribution over 150 annotated GPT-4V errors.</p>
            </div>
          </div>
        </div>
      <!-------------------------------------------------------------------- Error Example  -------------------------------------------------------------------->
        <div class="columns is-centered m-6">
          <div class="column is-full has-text-centered content">
            <h2 class="title is-3" id="examples">Error Examples</h2>
            <div class="carousel results-carousel">
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/1.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/2.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/3.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/4.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/5.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/6.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/7.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/8.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/9.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/10.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/11.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/12.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/13.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <!-- <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/14.png" alt="grade-lv" width="60%"/>
                </div>
              </div> -->
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/15.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/16.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/17.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/18.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/19.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/20.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/21.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/22.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/23.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/24.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/25.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/26.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/27.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/28.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/29.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/30.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/31.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/32.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/33.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/34.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/35.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/36.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/37.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/38.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/39.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/40.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/41.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/42.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/43.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/44.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/45.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/46.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/47.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/48.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/49.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/error/50.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
            </div>
          </div>
        </div>
      <!-------------------------------------------------------------------- Correct Example -------------------------------------------------------------------->
        <div class="columns is-centered m-6">
          <div class="column is-full has-text-centered content">
            <h2 class="title is-3">Correct Examples</h2>
            <div id="results-carousel" class="carousel results-carousel">
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/1.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/2.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/3.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/4.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/5.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/6.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/7.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/8.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/9.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/10.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/11.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/12.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <!-- <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/13.png" alt="grade-lv" width="60%"/>
                </div>
              </div> -->
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/14.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/15.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/16.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/17.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/18.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/19.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/20.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/21.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/22.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/23.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/24.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/25.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/26.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/27.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/28.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/29.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/30.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/31.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/32.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/33.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/34.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/35.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/36.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/37.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
              <div class="box m-5">
                <div class="content has-text-centered">
                  <img src="static/images/correct/38.png" alt="grade-lv" width="60%"/>
                </div>
              </div>
            </div>
          </div>
        </div>
      </div>
    </section>

    <!-- @PAN TODO: bibtex -->
    <section class="section" id="BibTeX">
      <div class="container is-max-desktop content">
        <h2 class="title is-3 has-text-centered">BibTeX</h2>
        <pre><code>
          @inproceedings{yue2023mmmu,
            title={MMMU: A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI},
            author={Xiang Yue and Yuansheng Ni and Kai Zhang and Tianyu Zheng and Ruoqi Liu and Ge Zhang and Samuel Stevens and Dongfu Jiang and Weiming Ren and Yuxuan Sun and Cong Wei and Botao Yu and Ruibin Yuan and Renliang Sun and Ming Yin and Boyuan Zheng and Zhenzhu Yang and Yibo Liu and Wenhao Huang and Huan Sun and Yu Su and Wenhu Chen},
            booktitle={Proceedings of CVPR},
            year={2024},
          }
    </code></pre>
      </div>
    </section>

    <footer class="footer">
      <div class="columns is-centered">
        <div class="column is-8">
          <div class="content">
            <p>
              This website is website adapted from <a href="https://nerfies.github.io/">Nerfies</a> and <a href="https://mathvista.github.io/">MathVista</a>, licensed under a <a rel="license"
                                                  href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
              Commons Attribution-ShareAlike 4.0 International License</a>.
            </p>
          </div>
        </div>
      </div>
    </footer>

  </body>
</html>