diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index b0f75b25975..a49e196cb52 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -383,6 +383,7 @@ ConfigureNVBench(
   string/join_strings.cpp
   string/lengths.cpp
   string/like.cpp
+  string/make_strings_column.cu
   string/replace_re.cpp
   string/reverse.cpp
   string/slice.cpp
diff --git a/cpp/benchmarks/string/make_strings_column.cu b/cpp/benchmarks/string/make_strings_column.cu
new file mode 100644
index 00000000000..10c7eb65bdd
--- /dev/null
+++ b/cpp/benchmarks/string/make_strings_column.cu
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/pair.h>
+#include <thrust/tabulate.h>
+
+#include <nvbench/nvbench.cuh>
+
+#include <vector>
+
+constexpr int min_row_width = 0;
+constexpr int max_row_width = 50;
+
+static void BM_make_strings_column(nvbench::state& state)
+{
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const has_nulls = static_cast<bool>(state.get_int64("has_nulls"));
+
+  data_profile const table_profile =
+    data_profile_builder()
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_row_width, max_row_width)
+      .null_probability(has_nulls ? std::optional<double>{0.1} : std::nullopt);
+  auto const data_table =
+    create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
+
+  using string_index_pair = thrust::pair<char const*, cudf::size_type>;
+  auto const stream       = cudf::get_default_stream();
+  auto input              = rmm::device_uvector<string_index_pair>(data_table->num_rows(), stream);
+  auto const d_data_ptr =
+    cudf::column_device_view::create(data_table->get_column(0).view(), stream);
+  thrust::tabulate(rmm::exec_policy(stream),
+                   input.begin(),
+                   input.end(),
+                   [data_col = *d_data_ptr] __device__(auto const idx) {
+                     if (data_col.is_null(idx)) { return string_index_pair{nullptr, 0}; }
+                     auto const row = data_col.element<cudf::string_view>(idx);
+                     return string_index_pair{row.data(), row.size_bytes()};
+                   });
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    [[maybe_unused]] auto const output = cudf::make_strings_column(input, stream);
+  });
+}
+
+static void BM_make_strings_column_batch(nvbench::state& state)
+{
+  auto const num_rows   = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const has_nulls  = static_cast<bool>(state.get_int64("has_nulls"));
+  auto const batch_size = static_cast<cudf::size_type>(state.get_int64("batch_size"));
+
+  data_profile const table_profile =
+    data_profile_builder()
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_row_width, max_row_width)
+      .null_probability(has_nulls ? std::optional<double>{0.1} : std::nullopt);
+  auto const data_table = create_random_table(
+    cycle_dtypes({cudf::type_id::STRING}, batch_size), row_count{num_rows}, table_profile);
+
+  using string_index_pair = thrust::pair<char const*, cudf::size_type>;
+  auto const stream       = cudf::get_default_stream();
+  auto input_data         = std::vector<rmm::device_uvector<string_index_pair>>{};
+  auto input              = std::vector<cudf::device_span<string_index_pair const>>{};
+  input_data.reserve(batch_size);
+  input.reserve(batch_size);
+  for (auto i = 0; i < batch_size; ++i) {
+    auto const d_data_ptr =
+      cudf::column_device_view::create(data_table->get_column(i).view(), stream);
+    auto batch_input = rmm::device_uvector<string_index_pair>(data_table->num_rows(), stream);
+    thrust::tabulate(rmm::exec_policy(stream),
+                     batch_input.begin(),
+                     batch_input.end(),
+                     [data_col = *d_data_ptr] __device__(auto const idx) {
+                       if (data_col.is_null(idx)) { return string_index_pair{nullptr, 0}; }
+                       auto const row = data_col.element<cudf::string_view>(idx);
+                       return string_index_pair{row.data(), row.size_bytes()};
+                     });
+    input_data.emplace_back(std::move(batch_input));
+    input.emplace_back(input_data.back());
+  }
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    [[maybe_unused]] auto const output = cudf::make_strings_column_batch(input, stream);
+  });
+}
+
+NVBENCH_BENCH(BM_make_strings_column)
+  .set_name("make_strings_column")
+  .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000, 200'000'000})
+  .add_int64_axis("has_nulls", {0, 1});
+
+NVBENCH_BENCH(BM_make_strings_column_batch)
+  .set_name("make_strings_column_batch")
+  .add_int64_axis("num_rows", {1'000'000, 10'000'000, 20'000'000})
+  .add_int64_axis("has_nulls", {0, 1})
+  .add_int64_axis("batch_size", {10, 50, 100, 200});
diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index c3b68b52c36..723768bb068 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -378,6 +378,25 @@ std::unique_ptr<column> make_strings_column(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
+/**
+ * @brief Construct a batch of STRING type columns given an array of device span of pointer/size
+ *
+ * This function has input/output expectation similar to the `make_strings_column()` API that
+ * accepts only one device span of pointer/size pairs. The difference is that, this is designed to
+ * create many strings column at once with minimal overhead of multiple kernel launches and
+ * stream synchronizations.
+ *
+ * @param input Array of device spans of pointer/size pairs, where each pointer is a device memory
+ *        address or `nullptr` (indicating a null string), and size is string length (in bytes)
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used for memory allocation of the output columns
+ * @return Array of constructed strings columns
+ */
+std::vector<std::unique_ptr<column>> make_strings_column_batch(
+  std::vector<cudf::device_span<thrust::pair<char const*, size_type> const>> const& input,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
 /**
  * @brief Construct a STRING type column given a device span of string_view.
  *
diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh
index cfb2e70bfed..0dca0cce85b 100644
--- a/cpp/include/cudf/detail/valid_if.cuh
+++ b/cpp/include/cudf/detail/valid_if.cuh
@@ -18,6 +18,7 @@
 
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/bit.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -188,5 +189,72 @@ CUDF_KERNEL void valid_if_n_kernel(InputIterator1 begin1,
   }
 }
 
+// TODO
+/**
+ * @brief Populates a set of bitmasks by applying a binary predicate to two
+*         input ranges.
+
+ * Given a set of bitmasks, `masks`, the state of bit `j` in mask `i` is
+ * determined by `p( *(begin1 + i), *(begin2 + j))`. If the predicate evaluates
+ * to true, the bit is set to `1`. If false, set to `0`.
+ *
+ * Example Arguments:
+ * begin1:        zero-based counting iterator,
+ * begin2:        zero-based counting iterator,
+ * p:             [](size_type col, size_type row){ return col == row; }
+ * masks:         [[b00...], [b00...], [b00...]]
+ * mask_count:    3
+ * mask_num_bits: 2
+ * valid_counts:  [0, 0, 0]
+ *
+ * Example Results:
+ * masks:         [[b10...], [b01...], [b00...]]
+ * valid_counts:  [1, 1, 0]
+ *
+ * @note If any mask in `masks` is `nullptr`, that mask will be ignored.
+ *
+ * @param begin1        LHS arguments to binary predicate. ex: column/mask idx
+ * @param begin2        RHS arguments to binary predicate. ex: row/bit idx
+ * @param p             Predicate: `bit = p(begin1 + mask_idx, begin2 + bit_idx)`
+ * @param masks         Masks for which bits will be obtained and assigned.
+ * @param mask_count    The number of `masks`.
+ * @param mask_num_bits The number of bits to assign for each mask. If this
+ *                      number is smaller than the total number of bits, the
+ *                      remaining bits may not be initialized.
+ * @param valid_counts  Used to obtain the total number of valid bits for each
+ *                      mask.
+ */
+template <int32_t block_size, typename ElementType, typename BinaryPredicate>
+CUDF_KERNEL void valid_if_batch_kernel(device_span<device_span<ElementType const> const> input,
+                                       BinaryPredicate p,
+                                       bitmask_type* const* masks,
+                                       size_type* valid_counts)
+{
+  for (std::size_t mask_idx = 0; mask_idx < input.size(); ++mask_idx) {
+    auto const mask_input    = input[mask_idx];
+    auto const mask_num_bits = mask_input.size();
+    auto const out_mask      = masks[mask_idx];
+
+    std::size_t block_offset{blockIdx.x * blockDim.x};
+    size_type warp_valid_count{0};
+    while (block_offset < mask_num_bits) {
+      auto const thread_idx    = block_offset + threadIdx.x;
+      auto const thread_active = thread_idx < mask_num_bits;
+      auto const bit_is_valid  = thread_active && p(mask_input[thread_idx]);
+      auto const warp_validity = __ballot_sync(0xffff'ffffu, bit_is_valid);
+
+      if (thread_active && threadIdx.x % warp_size == 0) {
+        out_mask[word_index(thread_idx)] = warp_validity;
+      }
+
+      warp_valid_count += __popc(warp_validity);
+      block_offset += blockDim.x * gridDim.x;
+    }
+
+    auto const block_valid_count = single_lane_block_sum_reduce<block_size, 0>(warp_valid_count);
+    if (threadIdx.x == 0) { atomicAdd(valid_counts + mask_idx, block_valid_count); }
+  }
+}
+
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh
index 6b1b453a752..f18cf78578e 100644
--- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh
+++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh
@@ -59,6 +59,61 @@ using string_index_pair = thrust::pair<char const*, size_type>;
  */
 constexpr size_type FACTORY_BYTES_PER_ROW_THRESHOLD = 64;
 
+/**
+ * @brief Gather characters to create a strings column using the given string_index_pair iterator
+ *
+ * @tparam IndexPairIterator iterator over type `pair<char const*,size_type>` values
+ *
+ * @param offsets The offsets for the output strings column
+ * @param chars_size The size (in bytes) of the chars data
+ * @param avg_bytes_per_row The average bytes per row
+ * @param begin Iterator to the first string_index_pair
+ * @param strings_count The number of strings
+ * @param stream CUDA stream used for device memory operations
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return An array of chars gathered from the input string_index_pair iterator
+ */
+template <typename IndexPairIterator>
+rmm::device_uvector<char> make_chars_column(column_view const& offsets,
+                                            int64_t chars_size,
+                                            int64_t avg_bytes_per_row,
+                                            IndexPairIterator begin,
+                                            size_type strings_count,
+                                            rmm::cuda_stream_view stream,
+                                            rmm::device_async_resource_ref mr)
+{
+  auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets);
+
+  // use a character-parallel kernel for long string lengths
+  if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) {
+    auto const str_begin = thrust::make_transform_iterator(
+      begin, cuda::proclaim_return_type<string_view>([] __device__(auto ip) {
+        return string_view{ip.first, ip.second};
+      }));
+
+    return gather_chars(str_begin,
+                        thrust::make_counting_iterator<size_type>(0),
+                        thrust::make_counting_iterator<size_type>(strings_count),
+                        d_offsets,
+                        chars_size,
+                        stream,
+                        mr);
+  }
+  // this approach is 2-3x faster for a large number of smaller string lengths
+  auto chars_data = rmm::device_uvector<char>(chars_size, stream, mr);
+  auto d_chars    = chars_data.data();
+  auto copy_chars = [d_chars] __device__(auto item) {
+    string_index_pair const str = thrust::get<0>(item);
+    int64_t const offset        = thrust::get<1>(item);
+    if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second);
+  };
+  thrust::for_each_n(rmm::exec_policy_nosync(stream),
+                     thrust::make_zip_iterator(thrust::make_tuple(begin, d_offsets)),
+                     strings_count,
+                     copy_chars);
+  return chars_data;
+}
+
 /**
  * @brief Create a strings-type column from iterators of pointer/size pairs
  *
@@ -88,8 +143,6 @@ std::unique_ptr<column> make_strings_column(IndexPairIterator begin,
   auto offsets_transformer_itr = thrust::make_transform_iterator(begin, offsets_transformer);
   auto [offsets_column, bytes] = cudf::strings::detail::make_offsets_child_column(
     offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
-  auto const d_offsets =
-    cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view());
 
   // create null mask
   auto validator = [] __device__(string_index_pair const item) { return item.first != nullptr; };
@@ -99,38 +152,9 @@ std::unique_ptr<column> make_strings_column(IndexPairIterator begin,
     (null_count > 0) ? std::move(new_nulls.first) : rmm::device_buffer{0, stream, mr};
 
   // build chars column
-  auto chars_data = [d_offsets, bytes = bytes, begin, strings_count, null_count, stream, mr] {
-    auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1);
-    // use a character-parallel kernel for long string lengths
-    if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) {
-      auto const str_begin = thrust::make_transform_iterator(
-        begin, cuda::proclaim_return_type<string_view>([] __device__(auto ip) {
-          return string_view{ip.first, ip.second};
-        }));
-
-      return gather_chars(str_begin,
-                          thrust::make_counting_iterator<size_type>(0),
-                          thrust::make_counting_iterator<size_type>(strings_count),
-                          d_offsets,
-                          bytes,
-                          stream,
-                          mr);
-    } else {
-      // this approach is 2-3x faster for a large number of smaller string lengths
-      auto chars_data = rmm::device_uvector<char>(bytes, stream, mr);
-      auto d_chars    = chars_data.data();
-      auto copy_chars = [d_chars] __device__(auto item) {
-        string_index_pair const str = thrust::get<0>(item);
-        int64_t const offset        = thrust::get<1>(item);
-        if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second);
-      };
-      thrust::for_each_n(rmm::exec_policy(stream),
-                         thrust::make_zip_iterator(thrust::make_tuple(begin, d_offsets)),
-                         strings_count,
-                         copy_chars);
-      return chars_data;
-    }
-  }();
+  auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1);
+  auto chars_data              = make_chars_column(
+    offsets_column->view(), bytes, avg_bytes_per_row, begin, strings_count, stream, mr);
 
   return make_strings_column(strings_count,
                              std::move(offsets_column),
diff --git a/cpp/src/strings/strings_column_factories.cu b/cpp/src/strings/strings_column_factories.cu
index 07516f91dcf..972e689f151 100644
--- a/cpp/src/strings/strings_column_factories.cu
+++ b/cpp/src/strings/strings_column_factories.cu
@@ -17,35 +17,177 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/detail/valid_if.cuh>
 #include <cudf/strings/detail/strings_column_factories.cuh>
-#include <cudf/strings/detail/utilities.cuh>
-#include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
-#include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/iterator/transform_iterator.h>
-#include <thrust/pair.h>
-
 namespace cudf {
 
+namespace strings::detail {
+
 namespace {
-struct string_view_to_pair {
-  string_view null_placeholder;
-  string_view_to_pair(string_view n) : null_placeholder(n) {}
-  __device__ thrust::pair<char const*, size_type> operator()(string_view const& i)
-  {
-    return (i.data() == null_placeholder.data())
-             ? thrust::pair<char const*, size_type>{nullptr, 0}
-             : thrust::pair<char const*, size_type>{i.data(), i.size_bytes()};
+
+using column_string_pairs = cudf::device_span<thrust::pair<char const*, size_type> const>;
+
+template <typename OutputType>
+std::pair<std::vector<std::unique_ptr<column>>, rmm::device_uvector<int64_t>>
+make_offsets_child_column_batch_async(std::vector<column_string_pairs> const& input,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::device_async_resource_ref mr)
+{
+  std::vector<std::unique_ptr<column>> offsets_columns(input.size());
+  rmm::device_uvector<int64_t> chars_sizes(input.size(), stream);
+  for (std::size_t idx = 0; idx < input.size(); ++idx) {
+    auto const string_pairs = input[idx];
+    auto const string_count = static_cast<size_type>(string_pairs.size());
+    auto offsets            = make_numeric_column(
+      data_type{type_to_id<OutputType>()}, string_count + 1, mask_state::UNALLOCATED, stream, mr);
+
+    auto const offsets_transformer = cuda::proclaim_return_type<size_type>(
+      [string_count, string_pairs = string_pairs.data()] __device__(size_type idx) -> size_type {
+        return idx < string_count ? string_pairs[idx].second : size_type{0};
+      });
+    auto const input_it  = cudf::detail::make_counting_transform_iterator(0, offsets_transformer);
+    auto const d_offsets = offsets->mutable_view().template data<OutputType>();
+    auto const output_it = cudf::detail::make_sizes_to_offsets_iterator(
+      d_offsets, d_offsets + string_count + 1, chars_sizes.data() + idx);
+    thrust::exclusive_scan(rmm::exec_policy_nosync(stream),
+                           input_it,
+                           input_it + string_count + 1,
+                           output_it,
+                           int64_t{0});
+    offsets_columns[idx] = std::move(offsets);
   }
-};
+
+  return {std::move(offsets_columns), std::move(chars_sizes)};
+}
+
+std::pair<std::vector<rmm::device_buffer>, rmm::device_uvector<size_type>> valid_if_batch_async(
+  std::vector<column_string_pairs> const& input,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  auto const d_input =
+    cudf::detail::make_device_uvector_async(input, stream, cudf::get_current_device_resource_ref());
+  auto const predicate = [] __device__(thrust::pair<char const*, size_type> pair) -> bool {
+    return pair.first != nullptr;
+  };
+
+  std::vector<rmm::device_buffer> null_masks(input.size());
+  for (std::size_t idx = 0; idx < input.size(); ++idx) {
+    null_masks[idx] = cudf::create_null_mask(
+      static_cast<size_type>(input[idx].size()), mask_state::UNINITIALIZED, stream, mr);
+  }
+  std::vector<bitmask_type*> h_masks(input.size());
+  std::transform(null_masks.begin(), null_masks.end(), h_masks.begin(), [](auto& mask) {
+    return reinterpret_cast<bitmask_type*>(mask.data());
+  });
+  auto const d_masks = cudf::detail::make_device_uvector_async(
+    h_masks, stream, cudf::get_current_device_resource_ref());
+
+  rmm::device_uvector<size_type> valid_counts(input.size(), stream, mr);
+  thrust::uninitialized_fill(
+    rmm::exec_policy_nosync(stream), valid_counts.begin(), valid_counts.end(), 0);
+
+  constexpr size_type block_size{256};
+  auto const grid = cudf::detail::grid_1d{static_cast<int64_t>(input.size()), block_size};
+  cudf::detail::valid_if_batch_kernel<block_size>
+    <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
+      device_span<column_string_pairs const>{d_input.data(), d_input.size()},
+      // d_input.data(),
+      predicate,
+      d_masks.data(),
+      valid_counts.data());
+
+  return {std::move(null_masks), std::move(valid_counts)};
+}
 
 }  // namespace
 
+std::vector<std::unique_ptr<column>> make_strings_column_batch(
+  std::vector<column_string_pairs> const& input,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  auto [offsets_cols, d_chars_sizes] =
+    make_offsets_child_column_batch_async<size_type>(input, stream, mr);
+  auto [null_masks, d_valid_counts] = valid_if_batch_async(input, stream, mr);
+
+  auto const chars_sizes  = cudf::detail::make_host_vector_async(d_chars_sizes, stream);
+  auto const valid_counts = cudf::detail::make_host_vector_async(d_valid_counts, stream);
+
+  // This should be the only stream sync in the entire API.
+  stream.synchronize();
+
+  auto const threshold = cudf::strings::get_offset64_threshold();
+  auto const overflow_count =
+    std::count_if(chars_sizes.begin(), chars_sizes.end(), [threshold](auto const chars_size) {
+      return chars_size >= threshold;
+    });
+  CUDF_EXPECTS(cudf::strings::is_large_strings_enabled() || overflow_count == 0,
+               "Size of output exceeds the column size limit",
+               std::overflow_error);
+
+  if (overflow_count > 0) {
+    std::vector<column_string_pairs> long_string_input;
+    std::vector<std::size_t> long_string_col_idx;
+    long_string_input.reserve(overflow_count);
+    long_string_col_idx.reserve(overflow_count);
+    for (std::size_t idx = 0; idx < input.size(); ++idx) {
+      auto const chars_size = chars_sizes[idx];
+      if (chars_size >= threshold) {
+        long_string_input.push_back(input[idx]);
+        long_string_col_idx.push_back(idx);
+      }
+    }
+
+    [[maybe_unused]] auto [new_offsets_cols, d_new_chars_sizes] =
+      make_offsets_child_column_batch_async<int64_t>(long_string_input, stream, mr);
+
+    // Update the new offsets columns.
+    // The new chars sizes should be the same as before, thus we don't need to update them.
+    for (std::size_t idx = 0; idx < long_string_col_idx.size(); ++idx) {
+      offsets_cols[long_string_col_idx[idx]] = std::move(new_offsets_cols[idx]);
+    }
+  }
+
+  std::vector<std::unique_ptr<column>> chars_cols(input.size());
+  std::vector<std::unique_ptr<column>> output(input.size());
+  for (std::size_t idx = 0; idx < input.size(); ++idx) {
+    auto const strings_count = static_cast<size_type>(input[idx].size());
+    if (strings_count == 0) {
+      output[idx] = make_empty_column(type_id::STRING);
+      continue;
+    }
+
+    auto const chars_size        = chars_sizes[idx];
+    auto const valid_count       = valid_counts[idx];
+    auto const avg_bytes_per_row = chars_size / std::max(valid_count, 1);
+    auto chars_data              = make_chars_column(offsets_cols[idx]->view(),
+                                        chars_size,
+                                        avg_bytes_per_row,
+                                        input[idx].data(),
+                                        strings_count,
+                                        stream,
+                                        mr);
+    output[idx]                  = make_strings_column(strings_count,
+                                      std::move(offsets_cols[idx]),
+                                      chars_data.release(),
+                                      strings_count - valid_count,
+                                      std::move(null_masks[idx]));
+  }
+
+  return output;
+}
+
+}  // namespace strings::detail
+
 // Create a strings-type column from vector of pointer/size pairs
 std::unique_ptr<column> make_strings_column(
   device_span<thrust::pair<char const*, size_type> const> strings,
@@ -57,6 +199,30 @@ std::unique_ptr<column> make_strings_column(
   return cudf::strings::detail::make_strings_column(strings.begin(), strings.end(), stream, mr);
 }
 
+std::vector<std::unique_ptr<column>> make_strings_column_batch(
+  std::vector<cudf::device_span<thrust::pair<char const*, size_type> const>> const& input,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+
+  return cudf::strings::detail::make_strings_column_batch(input, stream, mr);
+}
+
+namespace {
+struct string_view_to_pair {
+  string_view null_placeholder;
+  string_view_to_pair(string_view n) : null_placeholder(n) {}
+  __device__ thrust::pair<char const*, size_type> operator()(string_view const& i)
+  {
+    return (i.data() == null_placeholder.data())
+             ? thrust::pair<char const*, size_type>{nullptr, 0}
+             : thrust::pair<char const*, size_type>{i.data(), i.size_bytes()};
+  }
+};
+
+}  // namespace
+
 std::unique_ptr<column> make_strings_column(device_span<string_view const> string_views,
                                             string_view null_placeholder,
                                             rmm::cuda_stream_view stream,
diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu
index 90054e41d36..8b84c9938b8 100644
--- a/cpp/tests/strings/factories_test.cu
+++ b/cpp/tests/strings/factories_test.cu
@@ -209,6 +209,7 @@ TEST_F(StringsFactoriesTest, EmptyStringsColumn)
 
 namespace {
 using string_pair = thrust::pair<char const*, cudf::size_type>;
+
 struct string_view_to_pair {
   __device__ string_pair operator()(thrust::pair<cudf::string_view, bool> const& p)
   {
@@ -234,3 +235,109 @@ TEST_F(StringsFactoriesTest, StringPairWithNullsAndEmpty)
   auto result = cudf::make_strings_column(pairs);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), data);
 }
+
+struct StringsBatchConstructionTest : public cudf::test::BaseFixture {};
+
+TEST_F(StringsBatchConstructionTest, EmptyColumns)
+{
+  auto constexpr num_columns = 10;
+  auto const stream          = cudf::get_default_stream();
+
+  auto const d_string_pairs =
+    rmm::device_uvector<thrust::pair<char const*, cudf::size_type>>{0, stream};
+  auto const input =
+    std::vector<cudf::device_span<thrust::pair<char const*, cudf::size_type> const>>(
+      num_columns, {d_string_pairs.data(), d_string_pairs.size()});
+  auto const output = cudf::make_strings_column_batch(input, stream);
+
+  auto const expected_col = cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING});
+  for (auto const& col : output) {
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col->view(), col->view());
+  }
+}
+
+TEST_F(StringsBatchConstructionTest, AllNullsColumns)
+{
+  auto constexpr num_columns = 10;
+  auto constexpr num_rows    = 100;
+  auto const stream          = cudf::get_default_stream();
+
+  auto d_string_pairs =
+    rmm::device_uvector<thrust::pair<char const*, cudf::size_type>>{num_rows, stream};
+  thrust::uninitialized_fill_n(rmm::exec_policy(stream),
+                               d_string_pairs.data(),
+                               d_string_pairs.size(),
+                               thrust::pair<char const*, cudf::size_type>{nullptr, 0});
+  auto const input =
+    std::vector<cudf::device_span<thrust::pair<char const*, cudf::size_type> const>>(
+      num_columns, {d_string_pairs.data(), d_string_pairs.size()});
+  auto const output = cudf::make_strings_column_batch(input, stream);
+
+  auto const expected_col = cudf::make_strings_column(d_string_pairs);
+  for (auto const& col : output) {
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col->view(), col->view());
+  }
+}
+
+TEST_F(StringsBatchConstructionTest, CreateColumnsFromPairs)
+{
+  auto constexpr num_columns = 10;
+  auto constexpr num_rows    = 1000;
+  auto const stream          = cudf::get_default_stream();
+
+  std::vector<char const*> h_test_strings{"the quick brown fox jumps over the lazy dog",
+                                          "the fat cat lays next to the other accénted cat",
+                                          "a slow moving turtlé cannot catch the bird",
+                                          "which can be composéd together to form a more complete",
+                                          "thé result does not include the value in the sum in",
+                                          "",
+                                          nullptr,
+                                          "absent stop words"};
+
+  std::vector<std::size_t> offsets(h_test_strings.size() + 1, 0);
+  for (std::size_t i = 0; i < h_test_strings.size(); ++i) {
+    offsets[i + 1] = offsets[i] + (h_test_strings[i] ? strlen(h_test_strings[i]) : 0);
+  }
+
+  std::vector<char> h_buffer(offsets.back());
+  for (std::size_t i = 0; i < h_test_strings.size(); ++i) {
+    if (h_test_strings[i]) {
+      memcpy(h_buffer.data() + offsets[i], h_test_strings[i], strlen(h_test_strings[i]));
+    }
+  }
+  auto const d_test_strings = cudf::detail::make_device_uvector_sync(
+    h_buffer, cudf::get_default_stream(), cudf::get_current_device_resource_ref());
+
+  std::vector<thrust::host_vector<thrust::pair<char const*, cudf::size_type>>> h_input(num_columns);
+  for (auto& col : h_input) {
+    col.resize(num_rows);
+    for (int idx = 0; idx < num_rows; ++idx) {
+      auto const data_idx = idx % static_cast<int>(h_test_strings.size());
+      col[idx] = {
+        d_test_strings.data() + offsets[data_idx],
+        h_test_strings[data_idx] ? strlen(h_test_strings[data_idx]) : 0};
+    }
+  }
+
+  std::vector<cudf::device_span<thrust::pair<char const*, cudf::size_type> const>> input(
+    num_columns);
+  std::transform(h_input.begin(), h_input.end(), input.begin(), [](auto const& col) {
+    return cudf::device_span<thrust::pair<char const*, cudf::size_type> const>{col.data(),
+                                                                               col.size()};
+  });
+  auto const output = cudf::make_strings_column_batch(input, stream);
+
+  std::vector<std::unique_ptr<cudf::column>> expected(num_columns);
+  for (auto const& string_pairs : input) {
+    expected.emplace_back(cudf::make_strings_column(string_pairs));
+  }
+
+  for (std::size_t i = 0; i < num_columns; ++i) {
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected[i]->view(), output[i]->view());
+  }
+}
+
+TEST_F(StringsBatchConstructionTest, CreateLongStringsColumns)
+{
+  //
+}