index.html

<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" -->
  <!-- Replace the content tag with appropriate information -->
  <meta name="description" content="Stealthy Imitation is the first model theft attack against deep reinforcement learning policies that does not require access to the environment.">
  <meta property="og:title" content="Stealthy Imitation: Reward-guided Environment-free Policy Stealing"/>
  <meta property="og:description" content="Stealthy Imitation is the first model theft attack against deep reinforcement learning policies that does not require access to the environment."/>
  <meta property="og:url" content="https://zhixiongzh.github.io/stealthy-imitation/"/>
  <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X630-->
  <meta property="og:image" content="static/image/banner_image.png" />
  <meta property="og:image:width" content="1200"/>
  <meta property="og:image:height" content="630"/>


  <meta name="twitter:title" content="Stealthy Imitation: Reward-guided Environment-free Policy Stealing">
  <meta name="twitter:description" content="Stealthy Imitation is the first model theft attack against deep reinforcement learning policies that does not require access to the environment.">
  <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X600-->
  <meta name="twitter:image" content="static/image/banner_image.png">
  <meta name="twitter:card" content="summary_large_image">
  <!-- Keywords for your paper to be indexed by-->
  <meta name="keywords" content="model stealing, security, deep reinforcement learning, control system">
  <meta name="viewport" content="width=device-width, initial-scale=1">


  <title>Stealthy Imitation: Reward-guided Environment-free Policy Stealing</title>
  <link rel="icon" type="image/png" href="static/images/favicon.png">
  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
  rel="stylesheet">

  <link rel="stylesheet" href="static/css/bulma.min.css">
  <link rel="stylesheet" href="static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
  href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="static/css/index.css">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script src="https://documentcloud.adobe.com/view-sdk/main.js"></script>
  <script defer src="static/js/fontawesome.all.min.js"></script>
  <script src="static/js/bulma-carousel.min.js"></script>
  <script src="static/js/bulma-slider.min.js"></script>
  <script src="static/js/index.js"></script>
</head>
<body>


  <section class="hero">
    <div class="hero-body">
      <div class="container is-max-desktop">
        <div class="columns is-centered">
          <div class="column has-text-centered">
            <h1 class="title is-1 publication-title">Stealthy Imitation: Reward-guided Environment-free Policy Stealing</h1>
            <div class="is-size-5 publication-authors">
              <!-- Paper authors -->
              <span class="author-block">
                <a target="_blank">Zhixiong Zhuang</a><sup>1,2</sup>,</span>
                <span class="author-block">
                  <a href="https://ririnicolae.github.io/" target="_blank">Maria-Irina Nicolae</a><sup>2</sup>,</span>
                  <span class="author-block">
                    <a href="https://cispa.saarland/group/fritz/" target="_blank">Mario Fritz</a><sup>3</sup>
                  </span>
                  </div>

                  <div class="is-size-5 publication-authors">
                    <span class="author-block"><sup>1</sup>Saarland University,</span>
                    <span class="author-block"><sup>2</sup>Bosch Center for AI,</span>
                    <span class="author-block"><sup>3</sup>CISPA Helmholtz Center for Information Security</span>
                  </div>

                  <div class="column has-text-centered">
                    <div class="publication-links">
                         <!-- Arxiv PDF link -->
                      <span class="link-block">
                        <a href="https://arxiv.org/pdf/2405.07004.pdf" target="_blank"
                        class="external-link button is-normal is-rounded is-dark">
                        <span class="icon">
                          <i class="fas fa-file-pdf"></i>
                        </span>
                        <span>Paper</span>
                      </a>
                    </span>


                  <!-- Github link -->
                  <span class="link-block">
                    <a href="https://github.com/boschresearch/stealthy-imitation" target="_blank"
                    class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="fab fa-github"></i>
                    </span>
                    <span>Code</span>
                  </a>
                </span>

                <!-- ArXiv abstract Link -->
                <span class="link-block">
                  <a href="https://arxiv.org/abs/2405.07004" target="_blank"
                  class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                    <i class="ai ai-arxiv"></i>
                  </span>
                  <span>arXiv</span>
                </a>
              </span>
            </div>
          </div>
        </div>
      </div>
    </div>
  </div>
</section>

<section class="hero is-small">
  <div class="hero-body">
    <div class="container" style="text-align: center;">
      <!-- Your image here -->
      <img src="static/images/blind.png" alt="MY ALT TEXT" style="width:65%; height:auto;"/>
      <h2 class="subtitle has-text-centered">
        Traditional data-free model extraction fails in control systems due to the unknown environment with varying sensors. Stealthy Imitation effectively extracts policies by stealing the environment first.
      </h2>
    </div>
  </div>
</section>


<!-- Paper abstract -->
<section class="section hero is-light">
  <div class="container is-max-desktop">
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            Deep reinforcement learning policies, which are integral to modern control systems, represent valuable intellectual property. The development of these policies demands considerable resources, such as domain expertise, simulation fidelity, and real-world validation. These policies are potentially vulnerable to model stealing attacks, which aim to replicate their functionality using only black-box access.
            In this paper, we propose Stealthy Imitation, the first attack designed to steal policies without access to the environment or knowledge of the input range. This setup has not been considered by previous model stealing methods.
            Lacking access to the victim's input states distribution, Stealthy Imitation fits a reward model that allows to approximate it.
            We show that the victim policy is harder to imitate when the distribution of the attack queries matches that of the victim.
            We evaluate our approach across diverse, high-dimensional control tasks and consistently outperform prior data-free approaches adapted for policy stealing.
            Lastly, we propose a countermeasure that significantly diminishes the effectiveness of the attack.
          </p>
        </div>
      </div>
    </div>
  </div>
</section>
<!-- End paper abstract -->


<!-- Teaser video-->
<section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body" style="display: flex; flex-direction: column; justify-content: center; align-items: center;">
      <video poster="" id="tree" autoplay controls muted loop style="width:70%; height:auto;">
        <!-- Your video here -->
        <source src="static/videos/banner_video.mp4"
        type="video/mp4">
      </video>
      <h2 class="subtitle has-text-centered">
        A comparison of the extracted policy between our method and DFME.
      </h2>
    </div>
  </div>
</section>
<!-- End teaser video -->


<!-- Paper poster -->
<!-- <section class="hero is-small is-light">
  <div class="hero-body">
    <div class="container">
      <h2 class="title">Poster</h2>

      <iframe  src="static/pdfs/sample.pdf" width="100%" height="550">
          </iframe>
        
      </div>
    </div>
  </section> -->
<!--End paper poster -->


<!--BibTex citation -->
  <section class="section" id="BibTeX">
    <div class="container is-max-desktop content">
      <h2 class="title">BibTeX</h2>
      <pre><code>@inproceedings{zhuang2024stealthy,
        title={Stealthy Imitation: Reward-guided Environment-free Policy Stealing},
        author={Zhuang, Zhixiong and Nicolae, Maria-Irina and Fritz, Mario},
        booktitle={International Conference on Machine Learning (ICML)},
        year={2024}
}</code></pre>
    </div>
</section>
<!--End BibTex citation -->
  <footer class="footer">
  <div class="container">
    <div class="columns is-centered">
      <div class="column is-8">
        <div class="content">

          <p>
            This page was built using the <a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank">Academic Project Page Template</a> which was adopted from the <a href="https://nerfies.github.io" target="_blank">Nerfies</a> project page.
            You are free to borrow the of this website, we just ask that you link back to this page in the footer. <br> This website is licensed under a <a rel="license"  href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank">Creative
            Commons Attribution-ShareAlike 4.0 International License</a>.
          </p>

        </div>
      </div>
    </div>
  </div>
</footer>

<!-- Statcounter tracking code -->
  
<!-- You can add a tracker to track page visits by creating an account at statcounter.com -->

    <!-- End of Statcounter Code -->

  </body>
  </html>