diff --git a/docs/sphinx/api/languages/cpp_api.rst b/docs/sphinx/api/languages/cpp_api.rst
index dc80c6623d..1a26fb2d41 100644
--- a/docs/sphinx/api/languages/cpp_api.rst
+++ b/docs/sphinx/api/languages/cpp_api.rst
@@ -44,6 +44,8 @@ Common
 .. doxygenclass:: cudaq::async_result
     :members:
 
+.. doxygentypedef:: async_sample_result
+
 
 .. doxygenstruct:: cudaq::ExecutionResult
     :members:
@@ -168,7 +170,9 @@ Platform
 
 .. doxygenclass:: cudaq::BaseRemoteSimulatorQPU
 
-.. doxygenclass:: cudaq::BaseNvcfSimulatorQPU    
+.. doxygenclass:: cudaq::BaseNvcfSimulatorQPU
+
+.. doxygenclass:: cudaq::OrcaRemoteRESTQPU 
 
 .. doxygenclass:: cudaq::quantum_platform
     :members:
@@ -231,5 +235,7 @@ Namespaces
 .. doxygennamespace:: cudaq::orca
     :desc-only:
 
-.. doxygenfunction:: cudaq::orca::sample(std::vector<std::size_t> &input_state, std::vector<std::size_t> &loop_lengths, std::vector<double> &bs_angles, int n_samples = 10000)
-.. doxygenfunction:: cudaq::orca::sample(std::vector<std::size_t> &input_state, std::vector<std::size_t> &loop_lengths, std::vector<double> &bs_angles, std::vector<double> &ps_angles, int n_samples = 10000)
+.. doxygenfunction:: cudaq::orca::sample(std::vector<std::size_t> &input_state, std::vector<std::size_t> &loop_lengths, std::vector<double> &bs_angles, int n_samples = 10000, std::size_t qpu_id = 0)
+.. doxygenfunction:: cudaq::orca::sample(std::vector<std::size_t> &input_state, std::vector<std::size_t> &loop_lengths, std::vector<double> &bs_angles, std::vector<double> &ps_angles, int n_samples = 10000, std::size_t qpu_id = 0)
+.. doxygenfunction:: cudaq::orca::sample_async(std::vector<std::size_t> &input_state, std::vector<std::size_t> &loop_lengths, std::vector<double> &bs_angles, int n_samples = 10000, std::size_t qpu_id = 0)
+.. doxygenfunction:: cudaq::orca::sample_async(std::vector<std::size_t> &input_state, std::vector<std::size_t> &loop_lengths, std::vector<double> &bs_angles, std::vector<double> &ps_angles, int n_samples = 10000, std::size_t qpu_id = 0)
diff --git a/docs/sphinx/examples/cpp/providers/orca.cpp b/docs/sphinx/examples/cpp/providers/orca.cpp
index a23f7aa49e..9df9e79f81 100644
--- a/docs/sphinx/examples/cpp/providers/orca.cpp
+++ b/docs/sphinx/examples/cpp/providers/orca.cpp
@@ -8,6 +8,12 @@
 #include "cudaq/orca.h"
 #include "cudaq.h"
 
+#include <fstream>
+#include <iostream>
+
+#include <chrono>
+#include <thread>
+
 // define helper function to generate linear spaced vectors
 template <typename T>
 void linear_spaced_vector(std::vector<T> &xs, T min, T max, std::size_t N) {
@@ -20,6 +26,8 @@ void linear_spaced_vector(std::vector<T> &xs, T min, T max, std::size_t N) {
 }
 
 int main() {
+  using namespace std::this_thread;     // sleep_for, sleep_until
+  using namespace std::chrono_literals; // `ns`, `us`, `ms`, `s`, `h`, etc.
 
   // A time-bin boson sampling experiment: An input state of 4 indistinguishable
   // photons mixed with 4 vacuum states across 8 time bins (modes) enter the
@@ -60,11 +68,15 @@ int main() {
   // we can also set number of requested samples
   int n_samples{10000};
 
-  // Submit to ORCA synchronously (e.g., wait for the job result to be returned
-  // before proceeding with the rest of the execution).
+  // Submit to ORCA synchronously (e.g., wait for the job result to be
+  // returned before proceeding with the rest of the execution).
+  std::cout << "Submitting to ORCA Server synchronously" << std::endl;
   auto counts =
       cudaq::orca::sample(input_state, loop_lengths, bs_angles, n_samples);
 
+  // Print the results
+  counts.dump();
+
   // If the system includes phase shifters, the phase shifter angles can be
   // included in the call
 
@@ -73,8 +85,27 @@ int main() {
   //                                   ps_angles, n_samples);
   // ```
 
-  // Print the results
-  counts.dump();
+  // Alternatively we can submit to ORCA asynchronously (e.g., continue
+  // executing code in the file until the job has been returned).
+  std::cout << "Submitting to ORCA Server asynchronously" << std::endl;
+  auto async_results = cudaq::orca::sample_async(input_state, loop_lengths,
+                                                 bs_angles, n_samples);
+
+  // Can write the future to file:
+  {
+    std::ofstream out("saveMe.json");
+    out << async_results;
+  }
+
+  // Then come back and read it in later.
+  cudaq::async_result<cudaq::sample_result> readIn;
+  std::ifstream in("saveMe.json");
+  in >> readIn;
+
+  sleep_for(200ms); // wait for the job to be processed
+  // Get the results of the read in future.
+  auto async_counts = readIn.get();
+  async_counts.dump();
 
   return 0;
 }
\ No newline at end of file
diff --git a/docs/sphinx/examples/python/providers/orca.py b/docs/sphinx/examples/python/providers/orca.py
index 5f91aad305..53cadb09d5 100644
--- a/docs/sphinx/examples/python/providers/orca.py
+++ b/docs/sphinx/examples/python/providers/orca.py
@@ -1,4 +1,5 @@
 import cudaq
+import time
 
 import numpy as np
 import os
@@ -45,9 +46,11 @@
 # we can also set number of requested samples
 n_samples = 10000
 
+# Option A:
 # By using the synchronous `cudaq.orca.sample`, the execution of
 # any remaining classical code in the file will occur only
 # after the job has been returned from ORCA Server.
+print("Submitting to ORCA Server synchronously")
 counts = cudaq.orca.sample(input_state, loop_lengths, bs_angles, n_samples)
 
 # If the system includes phase shifters, the phase shifter angles can be
@@ -59,3 +62,32 @@
 
 # Print the results
 print(counts)
+
+# Option B:
+# By using the asynchronous `cudaq.orca.sample_async`, the remaining
+# classical code will be executed while the job is being handled
+# by Orca. This is ideal when submitting via a queue over
+# the cloud.
+print("Submitting to ORCA Server asynchronously")
+async_results = cudaq.orca.sample_async(input_state, loop_lengths, bs_angles,
+                                        n_samples)
+# ... more classical code to run ...
+
+# We can either retrieve the results later in the program with
+# ```
+# async_counts = async_results.get()
+# ```
+# or we can also write the job reference (`async_results`) to
+# a file and load it later or from a different process.
+file = open("future.txt", "w")
+file.write(str(async_results))
+file.close()
+
+# We can later read the file content and retrieve the job
+# information and results.
+time.sleep(0.2)  # wait for the job to be processed
+same_file = open("future.txt", "r")
+retrieved_async_results = cudaq.AsyncSampleResult(str(same_file.read()))
+
+counts = retrieved_async_results.get()
+print(counts)
diff --git a/docs/sphinx/using/backends/hardware.rst b/docs/sphinx/using/backends/hardware.rst
index 203e0268b1..0dbb53a3d8 100644
--- a/docs/sphinx/using/backends/hardware.rst
+++ b/docs/sphinx/using/backends/hardware.rst
@@ -312,6 +312,16 @@ configuration.
 
   export ORCA_ACCESS_URL="https://<ORCA API Server>"
 
+
+Sometimes the requests to the PT-1 require an authentication token. This token can be set as an
+environment variable named ``ORCA_AUTH_TOKEN``. For example, if the token is :code:`AbCdEf123456`,
+you can set the environment variable as follows:
+
+.. code:: bash
+
+  export ORCA_AUTH_TOKEN="AbCdEf123456"
+
+
 Submission from C++
 `````````````````````````
 
diff --git a/python/extension/CMakeLists.txt b/python/extension/CMakeLists.txt
index 60b8d0d4db..de6ba323bd 100644
--- a/python/extension/CMakeLists.txt
+++ b/python/extension/CMakeLists.txt
@@ -70,10 +70,13 @@ declare_mlir_python_extension(CUDAQuantumPythonSources.Extension
     ../runtime/utils/PyRemoteSimulatorQPU.cpp
     ../runtime/utils/PyRestRemoteClient.cpp
     ../utils/LinkedLibraryHolder.cpp
+    ../../runtime/common/ArgumentConversion.cpp
     ../../runtime/cudaq/platform/common/QuantumExecutionQueue.cpp
     ../../runtime/cudaq/platform/default/rest_server/RemoteRuntimeClient.cpp
+    ../../runtime/cudaq/platform/orca/OrcaExecutor.cpp
     ../../runtime/cudaq/platform/orca/OrcaQPU.cpp
-    ../../runtime/common/ArgumentConversion.cpp
+    ../../runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp
+    ../../runtime/cudaq/platform/orca/OrcaServerHelper.cpp
 
   EMBED_CAPI_LINK_LIBS
    CUDAQuantumMLIRCAPI
diff --git a/python/extension/CUDAQuantumExtension.cpp b/python/extension/CUDAQuantumExtension.cpp
index a0a23b71bc..d8cbfb81aa 100644
--- a/python/extension/CUDAQuantumExtension.cpp
+++ b/python/extension/CUDAQuantumExtension.cpp
@@ -166,20 +166,41 @@ PYBIND11_MODULE(_quakeDialects, m) {
   orcaSubmodule.def(
       "sample",
       py::overload_cast<std::vector<std::size_t> &, std::vector<std::size_t> &,
-                        std::vector<double> &, std::vector<double> &, int>(
-          &cudaq::orca::sample),
+                        std::vector<double> &, std::vector<double> &, int,
+                        std::size_t>(&cudaq::orca::sample),
       "Performs Time Bin Interferometer (TBI) boson sampling experiments on "
       "ORCA's backends",
       py::arg("input_state"), py::arg("loop_lengths"), py::arg("bs_angles"),
-      py::arg("ps_angles") = nullptr, py::arg("n_samples") = 10000);
+      py::arg("ps_angles"), py::arg("n_samples") = 10000,
+      py::arg("qpu_id") = 0);
   orcaSubmodule.def(
       "sample",
       py::overload_cast<std::vector<std::size_t> &, std::vector<std::size_t> &,
-                        std::vector<double> &, int>(&cudaq::orca::sample),
+                        std::vector<double> &, int, std::size_t>(
+          &cudaq::orca::sample),
+      "Performs Time Bin Interferometer (TBI) boson sampling experiments on "
+      "ORCA's backends",
+      py::arg("input_state"), py::arg("loop_lengths"), py::arg("bs_angles"),
+      py::arg("n_samples") = 10000, py::arg("qpu_id") = 0);
+  orcaSubmodule.def(
+      "sample_async",
+      py::overload_cast<std::vector<std::size_t> &, std::vector<std::size_t> &,
+                        std::vector<double> &, std::vector<double> &, int,
+                        std::size_t>(&cudaq::orca::sample_async),
       "Performs Time Bin Interferometer (TBI) boson sampling experiments on "
       "ORCA's backends",
       py::arg("input_state"), py::arg("loop_lengths"), py::arg("bs_angles"),
-      py::arg("n_samples") = 10000);
+      py::arg("ps_angles"), py::arg("n_samples") = 10000,
+      py::arg("qpu_id") = 0);
+  orcaSubmodule.def(
+      "sample_async",
+      py::overload_cast<std::vector<std::size_t> &, std::vector<std::size_t> &,
+                        std::vector<double> &, int, std::size_t>(
+          &cudaq::orca::sample_async),
+      "Performs Time Bin Interferometer (TBI) boson sampling experiments on "
+      "ORCA's backends",
+      py::arg("input_state"), py::arg("loop_lengths"), py::arg("bs_angles"),
+      py::arg("n_samples") = 10000, py::arg("qpu_id") = 0);
 
   auto photonicsSubmodule = cudaqRuntime.def_submodule("photonics");
   photonicsSubmodule.def(
@@ -217,7 +238,6 @@ PYBIND11_MODULE(_quakeDialects, m) {
         cudaq::getExecutionManager()->returnQudit(cudaq::QuditInfo(level, id));
       },
       "Release a qudit of given id.", py::arg("level"), py::arg("id"));
-
   cudaqRuntime.def("cloneModule",
                    [](MlirModule mod) { return wrap(unwrap(mod).clone()); });
   cudaqRuntime.def("isTerminator", [](MlirOperation op) {
diff --git a/runtime/cudaq/platform/orca/CMakeLists.txt b/runtime/cudaq/platform/orca/CMakeLists.txt
index 779a2cf794..3610b902a3 100644
--- a/runtime/cudaq/platform/orca/CMakeLists.txt
+++ b/runtime/cudaq/platform/orca/CMakeLists.txt
@@ -8,8 +8,14 @@
 
 set(LIBRARY_NAME cudaq-orca-qpu)
 message(STATUS "Building ORCA REST QPU.")
+set(ORCA_SRC
+  OrcaExecutor.cpp
+  OrcaQPU.cpp
+  OrcaRemoteRESTQPU.cpp
+  OrcaServerHelper.cpp
+)
 
-add_library(${LIBRARY_NAME} SHARED OrcaQPU.cpp)
+add_library(${LIBRARY_NAME} SHARED ${ORCA_SRC})
 
 target_include_directories(${LIBRARY_NAME} PRIVATE .
     PUBLIC 
@@ -30,9 +36,4 @@ target_link_libraries(${LIBRARY_NAME}
 install(TARGETS ${LIBRARY_NAME} DESTINATION lib)
 install(TARGETS ${LIBRARY_NAME} EXPORT cudaq-orca-qpu-targets DESTINATION lib)
 
-# install(EXPORT cudaq-orca-qpu-targets
-#         FILE CUDAQQPUOrcaTargets.cmake
-#         NAMESPACE cudaq::orca::
-#         DESTINATION lib/cmake/cudaq)
-
-add_target_config(orca)
\ No newline at end of file
+add_target_config(orca)
diff --git a/runtime/cudaq/platform/orca/OrcaExecutor.cpp b/runtime/cudaq/platform/orca/OrcaExecutor.cpp
new file mode 100644
index 0000000000..94413a24c5
--- /dev/null
+++ b/runtime/cudaq/platform/orca/OrcaExecutor.cpp
@@ -0,0 +1,47 @@
+/*******************************************************************************
+ * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
+ * All rights reserved.                                                        *
+ *                                                                             *
+ * This source code and the accompanying materials are made available under    *
+ * the terms of the Apache License 2.0 which accompanies this distribution.    *
+ ******************************************************************************/
+
+#include "OrcaExecutor.h"
+#include "OrcaServerHelper.h"
+#include "common/Logger.h"
+
+namespace cudaq {
+
+details::future OrcaExecutor::execute(cudaq::orca::TBIParameters params,
+                                      const std::string &kernelName) {
+  auto orcaServerHelper = dynamic_cast<OrcaServerHelper *>(serverHelper);
+  assert(orcaServerHelper);
+  orcaServerHelper->setShots(shots);
+  cudaq::info("Executor creating job to execute with the {} helper.",
+              orcaServerHelper->name());
+  // Create the Job Payload, composed of job post path, headers,
+  // and the job json messages themselves
+  auto [jobPostPath, headers, jobs] = orcaServerHelper->createJob(params);
+  auto job = jobs[0];
+  auto config = orcaServerHelper->getConfig();
+  std::vector<cudaq::details::future::Job> ids;
+  cudaq::info("Job created, posting to {}", jobPostPath);
+  // Post it, get the response
+  auto response = client.post(jobPostPath, "", job, headers);
+  cudaq::info("Job posted, response was {}", response.dump());
+  // Add the job id and the job name.
+  auto job_id = orcaServerHelper->extractJobId(response);
+  if (job_id.empty()) {
+    nlohmann::json tmp(job.at("job_id"));
+    orcaServerHelper->constructGetJobPath(tmp[0]);
+    job_id = tmp[0].at("job_id");
+  }
+  ids.emplace_back(job_id, kernelName);
+  config["output_names." + job_id] = kernelName;
+
+  config.insert({"shots", std::to_string(shots)});
+  std::string name = orcaServerHelper->name();
+  return cudaq::details::future(ids, name, config);
+}
+
+} // namespace cudaq
\ No newline at end of file
diff --git a/runtime/cudaq/platform/orca/OrcaExecutor.h b/runtime/cudaq/platform/orca/OrcaExecutor.h
new file mode 100644
index 0000000000..11f0dd76ad
--- /dev/null
+++ b/runtime/cudaq/platform/orca/OrcaExecutor.h
@@ -0,0 +1,26 @@
+/*******************************************************************************
+ * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
+ * All rights reserved.                                                        *
+ *                                                                             *
+ * This source code and the accompanying materials are made available under    *
+ * the terms of the Apache License 2.0 which accompanies this distribution.    *
+ ******************************************************************************/
+
+#pragma once
+
+#include "common/Executor.h"
+#include "orca_qpu.h"
+
+namespace cudaq {
+
+/// @brief The Executor subclass for ORCA target which has a distinct sampling
+/// API.
+class OrcaExecutor : public Executor {
+public:
+  /// @brief Execute the provided ORCA quantum parameters and return a future
+  /// object. The caller can make this synchronous by just immediately calling
+  /// .get().
+  details::future execute(cudaq::orca::TBIParameters params,
+                          const std::string &kernelName);
+};
+} // namespace cudaq
diff --git a/runtime/cudaq/platform/orca/OrcaQPU.cpp b/runtime/cudaq/platform/orca/OrcaQPU.cpp
index e16df275a1..63883a7af3 100644
--- a/runtime/cudaq/platform/orca/OrcaQPU.cpp
+++ b/runtime/cudaq/platform/orca/OrcaQPU.cpp
@@ -7,324 +7,90 @@
  * the terms of the Apache License 2.0 which accompanies this distribution.    *
  ******************************************************************************/
 
-#include "common/ExecutionContext.h"
-#include "common/FmtCore.h"
-
-#include "common/Logger.h"
-#include "common/RestClient.h"
-#include "common/ServerHelper.h"
-#include "cudaq.h"
-#include "nvqpp_config.h"
-
-#include "cudaq/platform/qpu.h"
-#include "cudaq/platform/quantum_platform.h"
-#include "cudaq/qis/qubit_qis.h"
-#include "cudaq/spin_op.h"
+// #include "common/ExecutionContext.h"
+// #include "common/Future.h"
+#include "cudaq/platform.h"
 #include "orca_qpu.h"
 
-#include "llvm/Support/Base64.h"
+namespace cudaq::orca {
 
-#include <fstream>
-#include <iostream>
-#include <netinet/in.h>
-#include <regex>
-#include <sys/socket.h>
-#include <sys/types.h>
+cudaq::sample_result runSampling(TBIParameters &parameters,
+                                 std::size_t qpu_id = 0) {
+  std::size_t shots = parameters.n_samples;
+  auto ctx = std::make_unique<cudaq::ExecutionContext>("sample", shots);
 
-namespace cudaq::orca {
-cudaq::sample_result sample(std::vector<std::size_t> &input_state,
-                            std::vector<std::size_t> &loop_lengths,
-                            std::vector<double> &bs_angles,
-                            std::vector<double> &ps_angles, int n_samples) {
-  TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles,
-                           n_samples};
-  cudaq::ExecutionContext context("sample", n_samples);
-  auto &platform = get_platform();
-  platform.set_exec_ctx(&context, 0);
-  cudaq::altLaunchKernel("orca_launch", nullptr, &parameters,
-                         sizeof(TBIParameters), 0);
+  auto &platform = cudaq::get_platform();
+  platform.set_exec_ctx(ctx.get(), qpu_id);
+  platform.set_current_qpu(qpu_id);
 
-  return context.result;
-}
-cudaq::sample_result sample(std::vector<std::size_t> &input_state,
-                            std::vector<std::size_t> &loop_lengths,
-                            std::vector<double> &bs_angles, int n_samples) {
-  std::vector<double> ps_angles = {};
-  TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles,
-                           n_samples};
-  cudaq::ExecutionContext context("sample", n_samples);
-  auto &platform = get_platform();
-  platform.set_exec_ctx(&context, 0);
   cudaq::altLaunchKernel("orca_launch", nullptr, &parameters,
                          sizeof(TBIParameters), 0);
 
-  return context.result;
-}
-} // namespace cudaq::orca
-
-namespace {
-
-/// @brief The OrcaRemoteRESTQPU is a subtype of QPU that enables the
-/// execution of CUDA-Q kernels on remotely hosted quantum computing
-/// services via a REST Client / Server interaction. This type is meant
-/// to be general enough to support any remotely hosted service.
-/// Moreover, this QPU handles launching kernels under the Execution Context
-/// that includs sampling via synchronous client invocations.
-class OrcaRemoteRESTQPU : public cudaq::QPU {
-protected:
-  /// The number of shots
-  std::optional<int> nShots;
-
-  /// @brief the platform file path, CUDAQ_INSTALL/platforms
-  std::filesystem::path platformPath;
-
-  /// @brief The name of the QPU being targeted
-  std::string qpuName;
-
-  /// @brief The base URL
-  std::string baseUrl;
-
-  /// @brief The machine we are targeting
-  std::string machine = "PT-1";
-
-  /// @brief Mapping of general key-values for backend
-  /// configuration.
-  std::map<std::string, std::string> backendConfig;
-
-  /// @brief Flag indicating whether we should emulate
-  /// execution locally.
-  bool emulate = false;
-
-private:
-  /// @brief RestClient used for HTTP requests.
-  cudaq::RestClient client;
-
-public:
-  /// @brief The constructor
-  OrcaRemoteRESTQPU() : QPU() {
-    std::filesystem::path cudaqLibPath{cudaq::getCUDAQLibraryPath()};
-    platformPath = cudaqLibPath.parent_path().parent_path() / "targets";
-  }
-
-  OrcaRemoteRESTQPU(OrcaRemoteRESTQPU &&) = delete;
-
-  /// @brief The destructor
-  virtual ~OrcaRemoteRESTQPU() = default;
-
-  /// Enqueue a quantum task on the asynchronous execution queue.
-  void enqueue(cudaq::QuantumTask &task) override {
-    execution_queue->enqueue(task);
-  }
-
-  /// @brief Return true if the current backend is a simulator
-  bool isSimulator() override { return emulate; }
-
-  /// @brief Return true if the current backend supports conditional feedback
-  bool supportsConditionalFeedback() override { return false; }
-
-  /// Provide the number of shots
-  void setShots(int _nShots) override { nShots = _nShots; }
-
-  /// Clear the number of shots
-  void clearShots() override { nShots = std::nullopt; }
-
-  /// @brief Return true if the current backend is remote
-  virtual bool isRemote() override { return !emulate; }
-
-  /// Store the execution context for launchKernel
-  void setExecutionContext(cudaq::ExecutionContext *context) override {
-    if (!context)
-      return;
-
-    cudaq::info("Remote Rest QPU setting execution context to {}",
-                context->name);
-
-    // Execution context is valid
-    executionContext = context;
-  }
-
-  /// Reset the execution context
-  void resetExecutionContext() override {
-    // do nothing here
-    executionContext = nullptr;
-  }
-
-  /// @brief This setTargetBackend override is in charge of reading the
-  /// specific target backend configuration file.
-  void setTargetBackend(const std::string &backend) override;
-
-  /// @brief Creates a quantum computation job using the provided kernel
-  /// executions and returns the corresponding payload.
-  cudaq::ServerJobPayload createJob(cudaq::orca::TBIParameters params);
-
-  /// @brief Given a completed job response, map back to the sample_result
-  cudaq::sample_result processResults(cudaq::ServerMessage &postJobResponse);
-
-  /// @brief Returns the name of the server helper.
-  const std::string name() const { return "orca"; }
-
-  /// @brief Returns the headers for the server requests.
-  cudaq::RestHeaders getHeaders();
-
-  /// @brief Initializes the server helper with the provided backend
-  /// configuration.
-  void initialize();
-
-  /// @brief Launch the kernel. Handle all pertinent
-  /// modifications for the execution context.
-  void launchKernel(const std::string &kernelName, void (*kernelFunc)(void *),
-                    void *args, std::uint64_t voidStarSize,
-                    std::uint64_t resultOffset,
-                    const std::vector<void *> &rawArgs) override;
-  void launchKernel(const std::string &kernelName,
-                    const std::vector<void *> &rawArgs) override {
-    throw std::runtime_error("launch kernel on raw args not implemented");
-  }
-};
-
-/// @brief This setTargetBackend override is in charge of reading the
-/// specific target backend configuration file.
-void OrcaRemoteRESTQPU::setTargetBackend(const std::string &backend) {
-  cudaq::info("Remote REST platform is targeting {}.", backend);
-
-  // First we see if the given backend has extra config params
-  auto mutableBackend = backend;
-  if (mutableBackend.find(";") != std::string::npos) {
-    auto split = cudaq::split(mutableBackend, ';');
-    mutableBackend = split[0];
-    // Must be key-value pairs, therefore an even number of values here
-    if ((split.size() - 1) % 2 != 0)
-      throw std::runtime_error(
-          "Backend config must be provided as key-value pairs: " +
-          std::to_string(split.size()));
-
-    // Add to the backend configuration map
-    for (std::size_t i = 1; i < split.size(); i += 2) {
-      // No need to decode trivial true/false values
-      if (split[i + 1].starts_with("base64_")) {
-        split[i + 1].erase(0, 7); // erase "base64_"
-        std::vector<char> decoded_vec;
-        if (auto err = llvm::decodeBase64(split[i + 1], decoded_vec))
-          throw std::runtime_error("DecodeBase64 error");
-        std::string decodedStr(decoded_vec.data(), decoded_vec.size());
-        cudaq::info("Decoded {} parameter from '{}' to '{}'", split[i],
-                    split[i + 1], decodedStr);
-        backendConfig.insert({split[i], decodedStr});
-      } else {
-        backendConfig.insert({split[i], split[i + 1]});
-      }
-    }
-  }
-
-  /// Once we know the backend, we should search for the config file
-  /// from there we can get the URL/PORT and other inforation used in the
-  /// pipeline.
-  // Set the qpu name
-  qpuName = mutableBackend;
-  initialize();
+  platform.reset_exec_ctx(qpu_id);
+  return ctx->result;
 }
 
-/// @brief Launch the kernel.
-void OrcaRemoteRESTQPU::launchKernel(const std::string &kernelName,
-                                     void (*kernelFunc)(void *), void *args,
-                                     std::uint64_t voidStarSize,
-                                     std::uint64_t resultOffset,
-                                     const std::vector<void *> &rawArgs) {
-  cudaq::info("launching ORCA remote rest kernel ({})", kernelName);
-
-  // TODO future iterations of this should support non-void return types.
-  if (!executionContext)
-    throw std::runtime_error("Remote rest execution can only be performed "
-                             "via cudaq::sample() or cudaq::observe().");
-
-  cudaq::orca::TBIParameters params =
-      *((struct cudaq::orca::TBIParameters *)args);
-  std::size_t shots = params.n_samples;
+async_sample_result runAsyncSampling(TBIParameters &parameters,
+                                     std::size_t qpu_id = 0) {
+  std::size_t shots = parameters.n_samples;
+  auto ctx = std::make_unique<cudaq::ExecutionContext>("sample", shots);
 
-  setShots(shots);
-  executionContext->shots = shots;
+  // Indicate that this is an async exec
+  cudaq::details::future futureResult;
+  ctx->asyncExec = true;
 
-  cudaq::info("Executor creating job to execute with the {} helper.", name());
-
-  // Create the Job Payload, composed of job post path, headers,
-  // and the job json messages themselves
-  auto [jobPostPath, headers, jobs] = createJob(params);
-  auto job = jobs[0];
-  cudaq::info("Job (name={}) created, posting to {}", kernelName, jobPostPath);
+  auto &platform = get_platform();
+  platform.set_exec_ctx(ctx.get(), qpu_id);
+  platform.set_current_qpu(qpu_id);
 
-  // Post it, get the response
-  auto response = client.post(jobPostPath, "", job, headers);
+  cudaq::altLaunchKernel("orca_launch", nullptr, &parameters,
+                         sizeof(TBIParameters), 0);
 
-  cudaq::sample_result counts = processResults(response);
+  // If we have a non-null future, set it
+  futureResult = ctx->futureResult;
 
-  // // return the results synchronously
-  executionContext->result = counts;
+  platform.reset_exec_ctx(qpu_id);
+  return async_sample_result(std::move(futureResult));
 }
 
-// Initialize the ORCA server helper with a given backend configuration
-void OrcaRemoteRESTQPU::initialize() {
-  // Set the machine
-  auto iter = backendConfig.find("machine");
-  if (iter != backendConfig.end())
-    machine = iter->second;
-
-  // Set a base URL if provided
-  iter = backendConfig.find("url");
-  if (iter != backendConfig.end()) {
-    baseUrl = iter->second;
-  }
+cudaq::sample_result sample(std::vector<std::size_t> &input_state,
+                            std::vector<std::size_t> &loop_lengths,
+                            std::vector<double> &bs_angles,
+                            std::vector<double> &ps_angles, int n_samples,
+                            std::size_t qpu_id) {
+  TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles,
+                           n_samples};
+  return runSampling(parameters, qpu_id);
 }
 
-// Create a job for the ORCA QPU
-cudaq::ServerJobPayload
-OrcaRemoteRESTQPU::createJob(cudaq::orca::TBIParameters params) {
-  std::vector<cudaq::ServerMessage> jobs;
-  cudaq::ServerMessage job;
-
-  // Construct the job message
-  job["target"] = machine;
-
-  job["input_state"] = params.input_state;
-  job["loop_lengths"] = params.loop_lengths;
-  job["bs_angles"] = params.bs_angles;
-  job["ps_angles"] = params.ps_angles;
-  job["n_samples"] = params.n_samples;
-
-  jobs.push_back(job);
-
-  // Return a tuple containing the job path, headers, and the job message
-  auto ret = std::make_tuple(baseUrl, getHeaders(), jobs);
-  return ret;
+cudaq::sample_result sample(std::vector<std::size_t> &input_state,
+                            std::vector<std::size_t> &loop_lengths,
+                            std::vector<double> &bs_angles, int n_samples,
+                            std::size_t qpu_id) {
+  std::vector<double> ps_angles = {};
+  TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles,
+                           n_samples};
+  return runSampling(parameters, qpu_id);
 }
 
-// Process the results from a job
-cudaq::sample_result
-OrcaRemoteRESTQPU::processResults(cudaq::ServerMessage &postJobResponse) {
-  auto results = postJobResponse.at("results");
-
-  cudaq::CountsDictionary counts;
-  // Process the results
-  for (const auto &key : results) {
-    counts[key] += 1;
-  }
-
-  // Create an execution result
-  cudaq::ExecutionResult executionResult(counts);
-  // Return a sample result
-  auto ret = cudaq::sample_result(executionResult);
-  return ret;
+async_sample_result sample_async(std::vector<std::size_t> &input_state,
+                                 std::vector<std::size_t> &loop_lengths,
+                                 std::vector<double> &bs_angles,
+                                 std::vector<double> &ps_angles, int n_samples,
+                                 std::size_t qpu_id) {
+  TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles,
+                           n_samples};
+  return runAsyncSampling(parameters, qpu_id);
 }
 
-// Get the headers for the API requests
-cudaq::RestHeaders OrcaRemoteRESTQPU::getHeaders() {
-  // Construct the headers
-  cudaq::RestHeaders headers;
-  headers["Authorization"] = "apiKey ";
-  headers["Content-Type"] = "application/json";
-  // Return the headers
-  return headers;
+async_sample_result sample_async(std::vector<std::size_t> &input_state,
+                                 std::vector<std::size_t> &loop_lengths,
+                                 std::vector<double> &bs_angles, int n_samples,
+                                 std::size_t qpu_id) {
+  std::vector<double> ps_angles = {};
+  TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles,
+                           n_samples};
+  return runAsyncSampling(parameters, qpu_id);
 }
 
-} // namespace
-
-CUDAQ_REGISTER_TYPE(cudaq::QPU, OrcaRemoteRESTQPU, orca)
+} // namespace cudaq::orca
diff --git a/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp
new file mode 100644
index 0000000000..03ce8c9daf
--- /dev/null
+++ b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp
@@ -0,0 +1,93 @@
+/*******************************************************************************
+ * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
+ * All rights reserved.                                                        *
+ *                                                                             *
+ * This source code and the accompanying materials are made available under    *
+ * the terms of the Apache License 2.0 which accompanies this distribution.    *
+ ******************************************************************************/
+
+#include "OrcaRemoteRESTQPU.h"
+#include "common/Logger.h"
+#include "llvm/Support/Base64.h"
+
+namespace cudaq {
+/// @brief This setTargetBackend override is in charge of reading the
+/// specific target backend configuration file.
+void OrcaRemoteRESTQPU::setTargetBackend(const std::string &backend) {
+  cudaq::info("Remote REST platform is targeting {}.", backend);
+
+  // First we see if the given backend has extra config params
+  auto mutableBackend = backend;
+  if (mutableBackend.find(";") != std::string::npos) {
+    auto split = cudaq::split(mutableBackend, ';');
+    mutableBackend = split[0];
+    // Must be key-value pairs, therefore an even number of values here
+    if ((split.size() - 1) % 2 != 0)
+      throw std::runtime_error(
+          "Backend config must be provided as key-value pairs: " +
+          std::to_string(split.size()));
+
+    // Add to the backend configuration map
+    for (std::size_t i = 1; i < split.size(); i += 2) {
+      // No need to decode trivial true/false values
+      if (split[i + 1].starts_with("base64_")) {
+        split[i + 1].erase(0, 7); // erase "base64_"
+        std::vector<char> decoded_vec;
+        if (auto err = llvm::decodeBase64(split[i + 1], decoded_vec))
+          throw std::runtime_error("DecodeBase64 error");
+        std::string decodedStr(decoded_vec.data(), decoded_vec.size());
+        cudaq::info("Decoded {} parameter from '{}' to '{}'", split[i],
+                    split[i + 1], decodedStr);
+        backendConfig.insert({split[i], decodedStr});
+      } else {
+        backendConfig.insert({split[i], split[i + 1]});
+      }
+    }
+  }
+
+  /// Once we know the backend, we should search for the config file
+  /// from there we can get the URL/PORT and other information used in the
+  /// pipeline.
+  // Set the qpu name
+  qpuName = mutableBackend;
+  serverHelper = registry::get<ServerHelper>(qpuName);
+  serverHelper->initialize(backendConfig);
+
+  // Give the server helper to the executor
+  executor->setServerHelper(serverHelper.get());
+}
+
+/// @brief Launch the experiment.
+void OrcaRemoteRESTQPU::launchKernel(const std::string &kernelName,
+                                     void (*kernelFunc)(void *), void *args,
+                                     std::uint64_t voidStarSize,
+                                     std::uint64_t resultOffset,
+                                     const std::vector<void *> &rawArgs) {
+  cudaq::info("launching ORCA remote rest experiment ({})", kernelName);
+
+  // TODO future iterations of this should support non-void return types.
+  if (!executionContext)
+    throw std::runtime_error("Remote rest execution can only be performed "
+                             "via cudaq::sample() or cudaq::observe().");
+
+  cudaq::orca::TBIParameters params =
+      *((struct cudaq::orca::TBIParameters *)args);
+  std::size_t shots = params.n_samples;
+
+  executionContext->shots = shots;
+
+  cudaq::details::future future;
+  future = executor->execute(params, kernelName);
+
+  // Keep this asynchronous if requested
+  if (executionContext->asyncExec) {
+    executionContext->futureResult = future;
+    return;
+  }
+
+  // Otherwise make this synchronous
+  executionContext->result = future.get();
+}
+
+} // namespace cudaq
+CUDAQ_REGISTER_TYPE(cudaq::QPU, cudaq::OrcaRemoteRESTQPU, orca)
\ No newline at end of file
diff --git a/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h
new file mode 100644
index 0000000000..3e14194d23
--- /dev/null
+++ b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h
@@ -0,0 +1,124 @@
+/****************************************************************-*- C++ -*-****
+ * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
+ * All rights reserved.                                                        *
+ *                                                                             *
+ * This source code and the accompanying materials are made available under    *
+ * the terms of the Apache License 2.0 which accompanies this distribution.    *
+ ******************************************************************************/
+
+#pragma once
+
+#include "OrcaExecutor.h"
+#include "common/ExecutionContext.h"
+#include "common/Future.h"
+#include "common/RestClient.h"
+#include "common/ServerHelper.h"
+#include "cudaq/platform/qpu.h"
+#include "orca_qpu.h"
+
+namespace cudaq {
+
+/// @brief The OrcaRemoteRESTQPU is a subtype of QPU that enables the
+/// execution of CUDA-Q kernels on remotely hosted quantum computing
+/// services via a REST Client / Server interaction. This type is meant
+/// to be general enough to support any remotely hosted service.
+/// Moreover, this QPU handles launching kernels under the Execution Context
+/// that includes sampling via synchronous client invocations.
+class OrcaRemoteRESTQPU : public cudaq::QPU {
+protected:
+  /// The number of shots
+  std::optional<int> nShots;
+
+  /// @brief the platform file path, CUDAQ_INSTALL/platforms
+  std::filesystem::path platformPath;
+
+  /// @brief The name of the QPU being targeted
+  std::string qpuName;
+
+  /// @brief Flag indicating whether we should emulate
+  /// execution locally.
+  bool emulate = false;
+
+  // Pointer to the concrete Executor for this QPU
+  std::unique_ptr<OrcaExecutor> executor;
+
+  /// @brief Pointer to the concrete ServerHelper, provides
+  /// specific JSON payloads and POST/GET URL paths.
+  std::unique_ptr<ServerHelper> serverHelper;
+
+  /// @brief Mapping of general key-values for backend
+  /// configuration.
+  std::map<std::string, std::string> backendConfig;
+
+private:
+  /// @brief RestClient used for HTTP requests.
+  RestClient client;
+
+public:
+  /// @brief The constructor
+  OrcaRemoteRESTQPU() : QPU() {
+    std::filesystem::path cudaqLibPath{getCUDAQLibraryPath()};
+    platformPath = cudaqLibPath.parent_path().parent_path() / "targets";
+    // Default is to run sampling via the remote rest call
+    executor = std::make_unique<OrcaExecutor>();
+  }
+
+  OrcaRemoteRESTQPU(OrcaRemoteRESTQPU &&) = delete;
+
+  /// @brief The destructor
+  virtual ~OrcaRemoteRESTQPU() = default;
+
+  /// Enqueue a quantum task on the asynchronous execution queue.
+  void enqueue(cudaq::QuantumTask &task) override {
+    execution_queue->enqueue(task);
+  }
+
+  /// @brief Return true if the current backend is a simulator
+  bool isSimulator() override { return emulate; }
+
+  /// @brief Return true if the current backend supports conditional feedback
+  bool supportsConditionalFeedback() override { return false; }
+
+  /// Provide the number of shots
+  void setShots(int _nShots) override { nShots = _nShots; }
+
+  /// Clear the number of shots
+  void clearShots() override { nShots = std::nullopt; }
+
+  /// @brief Return true if the current backend is remote
+  virtual bool isRemote() override { return !emulate; }
+
+  /// Store the execution context for launchKernel
+  void setExecutionContext(cudaq::ExecutionContext *context) override {
+    if (!context)
+      return;
+
+    cudaq::info("Remote Rest QPU setting execution context to {}",
+                context->name);
+
+    // Execution context is valid
+    executionContext = context;
+  }
+
+  /// Reset the execution context
+  void resetExecutionContext() override {
+    // do nothing here
+    executionContext = nullptr;
+  }
+
+  /// @brief This setTargetBackend override is in charge of reading the
+  /// specific target backend configuration file.
+  void setTargetBackend(const std::string &backend) override;
+
+  /// @brief Launch the kernel. Handle all pertinent modifications for the
+  /// execution context.
+  void launchKernel(const std::string &kernelName, void (*kernelFunc)(void *),
+                    void *args, std::uint64_t voidStarSize,
+                    std::uint64_t resultOffset,
+                    const std::vector<void *> &rawArgs) override;
+  void launchKernel(const std::string &kernelName,
+                    const std::vector<void *> &rawArgs) override {
+    throw std::runtime_error("launch kernel on raw args not implemented");
+  }
+};
+} // namespace cudaq
diff --git a/runtime/cudaq/platform/orca/OrcaServerHelper.cpp b/runtime/cudaq/platform/orca/OrcaServerHelper.cpp
new file mode 100644
index 0000000000..87adff66b9
--- /dev/null
+++ b/runtime/cudaq/platform/orca/OrcaServerHelper.cpp
@@ -0,0 +1,134 @@
+/*******************************************************************************
+ * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
+ * All rights reserved.                                                        *
+ *                                                                             *
+ * This source code and the accompanying materials are made available under    *
+ * the terms of the Apache License 2.0 which accompanies this distribution.    *
+ ******************************************************************************/
+
+#include "OrcaServerHelper.h"
+#include "common/Future.h"
+#include "common/Logger.h"
+#include "common/Registry.h"
+#include "orca_qpu.h"
+
+namespace cudaq {
+
+// Initialize the ORCA server helper with a given backend configuration
+void OrcaServerHelper::initialize(BackendConfig config) {
+  backendConfig = config;
+
+  // Set the machine
+  auto iter = backendConfig.find("machine");
+  if (iter != backendConfig.end())
+    machine = iter->second;
+
+  // Set an alternate base URL if provided
+  iter = backendConfig.find("url");
+  if (iter != backendConfig.end()) {
+    baseUrl = iter->second;
+    if (!baseUrl.ends_with("/"))
+      baseUrl += "/";
+  }
+}
+
+// Create a job for the ORCA QPU
+ServerJobPayload
+OrcaServerHelper::createJob(cudaq::orca::TBIParameters params) {
+  std::vector<ServerMessage> jobs;
+  ServerMessage job;
+
+  // Construct the job message
+  job["target"] = machine;
+
+  job["input_state"] = params.input_state;
+  job["loop_lengths"] = params.loop_lengths;
+  job["bs_angles"] = params.bs_angles;
+  job["ps_angles"] = params.ps_angles;
+  job["n_samples"] = params.n_samples;
+
+  jobs.push_back(job);
+
+  // Return a tuple containing the job path, headers, and the job message
+  return std::make_tuple(baseUrl + "v1/submit", getHeaders(), jobs);
+}
+
+// Process the results from a job
+sample_result OrcaServerHelper::processResults(ServerMessage &postJobResponse,
+                                               std::string &jobID) {
+  auto results = postJobResponse.at("results");
+
+  CountsDictionary counts;
+  // Process the results
+  for (const auto &key : results) {
+    counts[key] += 1;
+  }
+
+  // Create an execution result
+  ExecutionResult executionResult(counts);
+  // Return a sample result
+  auto ret = sample_result(executionResult);
+  return ret;
+}
+
+std::map<std::string, std::string>
+OrcaServerHelper::generateRequestHeader() const {
+  std::string token, refreshKey, timeStr;
+  if (auto auth_token = std::getenv("ORCA_AUTH_TOKEN"))
+    token = "Bearer " + std::string(auth_token);
+  else
+    token = "Bearer ";
+
+  std::map<std::string, std::string> headers{
+      {"Authorization", token},
+      {"Content-Type", "application/json"},
+      {"Connection", "keep-alive"},
+      {"Accept", "*/*"}};
+  return headers;
+}
+
+// Get the headers for the API requests
+RestHeaders OrcaServerHelper::getHeaders() { return generateRequestHeader(); }
+
+// From a server message, extract the job ID
+std::string OrcaServerHelper::extractJobId(ServerMessage &postResponse) {
+  // If the response does not contain the key 'id', throw an exception
+  if (!postResponse.contains("job_id"))
+    throw std::runtime_error("ServerMessage doesn't contain 'job_id' key.");
+
+  // Return the job ID from the response
+  auto ret = postResponse.at("job_id");
+  return ret;
+}
+
+std::string OrcaServerHelper::constructGetJobPath(ServerMessage &postResponse) {
+  return baseUrl + "v1/get_job/" + extractJobId(postResponse);
+}
+
+std::string OrcaServerHelper::constructGetJobPath(std::string &jobId) {
+  return baseUrl + "v1/get_job/" + jobId;
+}
+
+bool OrcaServerHelper::jobIsDone(ServerMessage &getJobResponse) {
+  auto error = getJobResponse["error_message"].is_null();
+  auto status = getJobResponse["job_status"].is_null();
+  if (error & status) {
+    return true;
+  } else if (!status) {
+    auto job_status = getJobResponse["job_status"].get<std::string>();
+    cudaq::info("job_status {}", job_status);
+    return false;
+  } else {
+    auto error_message = getJobResponse["error_message"].get<std::string>();
+    cudaq::info("error_message {}", error_message);
+    if (error_message == "Job can't be found") {
+      return false;
+    } else {
+      throw std::runtime_error(error_message);
+    }
+  }
+}
+
+} // namespace cudaq
+
+CUDAQ_REGISTER_TYPE(cudaq::ServerHelper, cudaq::OrcaServerHelper, orca)
diff --git a/runtime/cudaq/platform/orca/OrcaServerHelper.h b/runtime/cudaq/platform/orca/OrcaServerHelper.h
new file mode 100644
index 0000000000..c3c5837ccb
--- /dev/null
+++ b/runtime/cudaq/platform/orca/OrcaServerHelper.h
@@ -0,0 +1,91 @@
+/*******************************************************************************
+ * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates.                  *
+ * All rights reserved.                                                        *
+ *                                                                             *
+ * This source code and the accompanying materials are made available under    *
+ * the terms of the Apache License 2.0 which accompanies this distribution.    *
+ ******************************************************************************/
+#pragma once
+
+#include "common/Registry.h"
+#include "common/ServerHelper.h"
+#include "cudaq/utils/cudaq_utils.h"
+#include "orca_qpu.h"
+
+#include "nlohmann/json.hpp"
+
+namespace cudaq {
+
+class OrcaServerHelper : public ServerHelper {
+
+protected:
+  /// @brief The base URL
+  std::string baseUrl = "http://localhost:8080/";
+
+  /// @brief The machine we are targeting
+  std::string machine = "PT-1";
+
+  /// @brief Time string, when the last tokens were retrieved
+  std::string timeStr = "";
+
+  /// @brief The refresh token
+  std::string refreshKey = "";
+
+  /// @brief ORCA requires the API token be updated every so often,
+  /// using the provided refresh token. This function will do that.
+  void refreshTokens(bool force_refresh = false);
+
+  /// @brief Return the headers required for the REST calls
+  RestHeaders generateRequestHeader() const;
+
+public:
+  OrcaServerHelper() = default;
+  virtual ~OrcaServerHelper() = default;
+
+  /// @brief Return the name of this server helper, must be the
+  /// same as the QPU configuration file.
+  const std::string name() const override { return "orca"; }
+
+  /// @brief Return the POST/GET required headers.
+  /// @return
+  RestHeaders getHeaders() override;
+
+  /// @brief Set the server configuration.
+  void initialize(BackendConfig config) override;
+
+  /// @brief Create a job payload for the provided TBI parameters
+  ServerJobPayload createJob(cudaq::orca::TBIParameters params);
+
+  /// @brief Create a job payload for the provided quantum codes
+  ServerJobPayload
+  createJob(std::vector<KernelExecution> &circuitCodes) override {
+    std::vector<ServerMessage> jobs;
+    ServerMessage job;
+    jobs.push_back(job);
+
+    std::map<std::string, std::string> headers;
+
+    // Return a tuple containing the job path, headers, and the job message
+    auto ret = std::make_tuple("", headers, jobs);
+    return ret;
+  };
+
+  /// @brief Return the job id from the previous job post
+  std::string extractJobId(ServerMessage &postResponse) override;
+
+  /// @brief Return the URL for retrieving job results
+  std::string constructGetJobPath(ServerMessage &postResponse) override;
+  std::string constructGetJobPath(std::string &jobId) override;
+
+  /// @brief Return true if the job is done
+  bool jobIsDone(ServerMessage &getJobResponse) override;
+
+  // /// @brief Given a completed job response, map back to the sample_result
+  // sample_result processResults(ServerMessage &postJobResponse);
+
+  /// @brief Given a completed job response, map back to the sample_result
+  sample_result processResults(ServerMessage &postJobResponse,
+                               std::string &jobID) override;
+};
+
+} // namespace cudaq
diff --git a/runtime/cudaq/platform/orca/orca.yml b/runtime/cudaq/platform/orca/orca.yml
index 6367600bd3..d55951f643 100644
--- a/runtime/cudaq/platform/orca/orca.yml
+++ b/runtime/cudaq/platform/orca/orca.yml
@@ -22,7 +22,7 @@ target-arguments:
   - key: url
     required: false
     type: string
-    platform-arg: url 
+    platform-arg: url
     help-string: "Specify URL."
   - key: machine
     required: false
diff --git a/runtime/cudaq/platform/orca/orca_qpu.h b/runtime/cudaq/platform/orca/orca_qpu.h
index fe95f6aeb0..643a1faf91 100644
--- a/runtime/cudaq/platform/orca/orca_qpu.h
+++ b/runtime/cudaq/platform/orca/orca_qpu.h
@@ -5,11 +5,13 @@
  * This source code and the accompanying materials are made available under    *
  * the terms of the Apache License 2.0 which accompanies this distribution.    *
  ******************************************************************************/
-
 #pragma once
 
-#include "cudaq.h"
+#include "common/ExecutionContext.h"
+#include "common/Future.h"
+#include "common/MeasureCounts.h"
 #include "cudaq/platform/quantum_platform.h"
+
 #include <functional>
 #include <vector>
 
@@ -25,14 +27,30 @@ struct TBIParameters {
   int n_samples;
 };
 
+/// @brief Return type for asynchronous sampling.
+using async_sample_result = cudaq::async_result<cudaq::sample_result>;
+
 /// @brief Implementation of the sample method of the cudaq::orca namespace
 cudaq::sample_result sample(std::vector<std::size_t> &input_state,
                             std::vector<std::size_t> &loop_lengths,
                             std::vector<double> &bs_angles,
                             std::vector<double> &ps_angles,
-                            int n_samples = 10000);
+                            int n_samples = 10000, std::size_t qpu_id = 0);
+
 cudaq::sample_result sample(std::vector<std::size_t> &input_state,
                             std::vector<std::size_t> &loop_lengths,
                             std::vector<double> &bs_angles,
-                            int n_samples = 10000);
+                            int n_samples = 10000, std::size_t qpu_id = 0);
+
+async_sample_result sample_async(std::vector<std::size_t> &input_state,
+                                 std::vector<std::size_t> &loop_lengths,
+                                 std::vector<double> &bs_angles,
+                                 std::vector<double> &ps_angles,
+                                 int n_samples = 10000, std::size_t qpu_id = 0);
+
+async_sample_result sample_async(std::vector<std::size_t> &input_state,
+                                 std::vector<std::size_t> &loop_lengths,
+                                 std::vector<double> &bs_angles,
+                                 int n_samples = 10000, std::size_t qpu_id = 0);
+
 }; // namespace cudaq::orca
\ No newline at end of file