diff --git a/apps/typegpu-docs/src/content/docs/fundamentals/pipelines.mdx b/apps/typegpu-docs/src/content/docs/fundamentals/pipelines.mdx
index 1f29da63b..b456b1693 100644
--- a/apps/typegpu-docs/src/content/docs/fundamentals/pipelines.mdx
+++ b/apps/typegpu-docs/src/content/docs/fundamentals/pipelines.mdx
@@ -325,9 +325,7 @@ It accepts the number of vertices and optionally the instance count, first verte
 After calling the method, the shader is set for execution immediately.
 
 Compute pipelines are executed using the `dispatchWorkgroups` method, which accepts the number of workgroups in each dimension.
-Unlike render pipelines, after running this method, the execution is not submitted to the GPU immediately.
-In order to do so, `root['~unstable'].flush()` needs to be run.
-However, that is usually not necessary, as it is done automatically when trying to read the result of computation.
+Similarly to render pipelines, after calling the method, the shader is set for execution immediately.
 
 ### Drawing with `drawIndexed`
 
@@ -376,14 +374,14 @@ const mainFragment = tgpu['~unstable'].fragmentFn({
 const indexBuffer = root
   .createBuffer(d.arrayOf(d.u16, 6), [0, 2, 1, 0, 3, 2])
   .$usage('index');
-  
+
 const pipeline = root['~unstable']
   .withVertex(vertex, { color: vertexLayout.attrib })
   .withFragment(mainFragment, { format: presentationFormat })
   .createPipeline()
   .withIndexBuffer(indexBuffer);
 
-  pipeline 
+  pipeline
     .with(vertexLayout, colorBuffer)
     .drawIndexed(6);
 ```
@@ -394,6 +392,8 @@ The higher-level API has several limitations, therefore another way of executing
 
 `root['~unstable'].beginRenderPass` is a method that mirrors the WebGPU API, but enriches it with a direct TypeGPU resource support.
 
+The render pass is submitted automatically to the device queue.
+
 ```ts
 root['~unstable'].beginRenderPass(
   {
@@ -407,8 +407,6 @@ root['~unstable'].beginRenderPass(
     pass.draw(3);
   },
 );
-
-root['~unstable'].flush();
 ```
 
 It is also possible to access the underlying WebGPU resources for the TypeGPU pipelines, by calling `root.unwrap(pipeline)`.
diff --git a/apps/typegpu-docs/src/content/docs/fundamentals/utils.mdx b/apps/typegpu-docs/src/content/docs/fundamentals/utils.mdx
index 1dead965a..41cd3cb31 100644
--- a/apps/typegpu-docs/src/content/docs/fundamentals/utils.mdx
+++ b/apps/typegpu-docs/src/content/docs/fundamentals/utils.mdx
@@ -117,6 +117,141 @@ The default workgroup sizes are:
 The callback is not called if the global invocation id of a thread would exceed the size in any dimension.
 :::
 
+## *batch*
+By default, TypeGPU pipelines and render passes are submitted to the GPU immediately.
+If you want to give the GPU an opportunity to better utilize its resources,
+you can use the `batch` function.
+
+The `batch` function allows you to submit multiple pipelines and render passes to the GPU in a single call.
+Under the hood, it creates `GPUCommandEncoder`,
+records the commands from the provided callback function,
+and submits the resulting `GPUCommandBuffer` to the device.
+
+:::caution
+Read–write operations always flush the command encoder (flushing means finalizing the command encoder and submitting the resulting command buffer to the GPU) inside the batch environment. Outside they don't have to, everything is already flushed. We've prepared a table showing when a flush occurs (i.e., when a new command encoder is created). Keep this in mind when using `batch`.
+:::
+
+| Invocation                                      | Inside batch env    | Outside batch env  |
+|-------------------------------------------------|---------------------|--------------------|
+| `pipeline.draw`                                 | No Flush ❌         | Flush ✅           |
+| `pipeline.drawIndexed`                          | No Flush ❌         | Flush ✅           |
+| `pipeline.dispatchWorkgroups`                   | No Flush ❌         | Flush ✅           |
+| `pipeline.withPerformanceCallback`              | No Flush ❌ / ⚠️ ⬇️ | Flush ✅           |
+| `pipeline.withTimestampWrites`                  | No Flush ❌         | Flush ✅           |
+| `beginRenderPass`                               | No Flush ❌         | Flush ✅           |
+| `buffer.write`                                  | Flush ✅            | No Flush ❌        |
+| `buffer.writePartial`                           | Flush ✅            | No Flush ❌        |
+| `buffer.read`                                   | Flush ✅            | No Flush ❌        |
+| `querySet.resolve`                              | No Flush ❌         | No Flush ❌        |
+| `querySet.read`                                 | Flush ✅            | Flush ✅           |
+| `pipeline containing console.log`               | Flush ✅            | Flush ✅           |
+| `prepareDispatch().dispatch`                    | No flush ❌         | Flush ✅           |
+| `nested batch`                                  | Flush ✅            | N/A                |
+
+
+:::caution
+When you call a pipeline with a performance callback, the callback is invoked at the end of the batch. The timestamps themselves are not affected by the batching. They are still written at the beginning and/or end of the associated pipeline/render pass.
+:::
+
+:::caution
+`querySet.resolve` itself never flushes.
+- If you need to read from it, `querySet.read` will handle the flush.
+- If you use it on the GPU, another function will flush the existing `commandEncoder` with the `querySet.resolve` command.
+This works because we never create a new `commandEncoder` unless it's necessary.
+:::
+
+### Example
+```ts twoslash
+import tgpu from 'typegpu';
+import * as d from 'typegpu/data';
+
+const entryFn = tgpu['~unstable'].computeFn({ workgroupSize: [7] })(() => {});
+const vertexFn = tgpu['~unstable'].vertexFn({
+  out: { pos: d.builtin.position },
+})(() => {
+  return { pos: d.vec4f() };
+});
+const fragmentFn = tgpu['~unstable'].fragmentFn({
+  out: d.vec4f,
+})(() => d.vec4f());
+
+const root = await tgpu.init();
+
+const renderPipeline = root['~unstable']
+  .withVertex(vertexFn, {})
+  .withFragment(fragmentFn, { format: 'rgba8unorm' })
+  .createPipeline();
+
+const computePipeline = root['~unstable']
+  .withCompute(entryFn)
+  .createPipeline();
+
+const buffer = root.createBuffer(d.arrayOf(d.f32, 1024));
+
+// ---cut---
+const render = () => {
+  computePipeline.dispatchWorkgroups(7, 7, 7);
+  renderPipeline.draw(777);
+  // more operations...
+
+  buffer.write(Array.from({ length: 1024 }, () => Math.random()));
+  // force flush caused by write, new command encoder
+};
+
+root['~unstable'].batch(render);
+```
+
+:::note
+The batch callback must be synchronous.
+While this constraint may appear restrictive, the recommended approach is to divide
+the batch into multiple separate batches if asynchronous operations are required.
+:::
+
+### Nested batches
+Nested batches flush the existing command encoder and create a new one. Performance callbacks registered inside a nested batch are invoked at its end. For example:
+
+
+```ts twoslash
+import tgpu from 'typegpu';
+import * as d from 'typegpu/data';
+
+const vertexFn = tgpu['~unstable'].vertexFn({
+  out: { pos: d.builtin.position },
+})(() => {
+  return { pos: d.vec4f() };
+});
+const fragmentFn = tgpu['~unstable'].fragmentFn({
+  out: d.vec4f,
+})(() => d.vec4f());
+
+const root = await tgpu.init();
+
+const callback = () => {};
+
+const pipeline = root['~unstable']
+  .withVertex(vertexFn, {})
+  .withFragment(fragmentFn, { format: 'rgba8unorm' })
+  .createPipeline();
+
+const renderPipelineWithPerformanceCallback1 = pipeline.withPerformanceCallback(
+  callback,
+);
+const renderPipelineWithPerformanceCallback2 = pipeline.withPerformanceCallback(
+  callback,
+);
+
+// ---cut---
+root['~unstable'].batch(() => {
+  renderPipelineWithPerformanceCallback1.draw(1882);
+  root['~unstable'].batch(() => {
+    renderPipelineWithPerformanceCallback2.draw(1882);
+  });
+  // flush of the command encoder occurs, then callback2 is invoked
+});
+// flush of the (empty) command encoder occurs, then callback1 is invoked
+```
+
+
 ## *console.log*
 
 Yes, you read that correctly, TypeGPU implements logging to the console on the GPU!
@@ -135,7 +270,7 @@ const compute = prepareDispatch(root, () => {
   console.log('Call number', callCountMutable.$);
 });
 
-compute.dispatch(); 
+compute.dispatch();
 compute.dispatch();
 
 // Eventually...
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1-typegpu.ts
index 5059d15ea..03b648e4c 100644
--- a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1-typegpu.ts
+++ b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1-typegpu.ts
@@ -58,6 +58,4 @@ onFrame(() => {
     ],
     vertexCount: 3,
   });
-
-  root.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1side-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1side-typegpu.ts
index 1e8b1f92e..405268abc 100644
--- a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1side-typegpu.ts
+++ b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1side-typegpu.ts
@@ -77,6 +77,4 @@ onFrame(() => {
     ],
     vertexCount: 3,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step2-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step2-typegpu.ts
index ffa017146..97dadd78c 100644
--- a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step2-typegpu.ts
+++ b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step2-typegpu.ts
@@ -106,6 +106,4 @@ onFrame(() => {
     ],
     vertexCount: 3,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step3-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step3-typegpu.ts
index 5d35df928..3474c90e4 100644
--- a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step3-typegpu.ts
+++ b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step3-typegpu.ts
@@ -115,6 +115,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step4-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step4-typegpu.ts
index 1ddddd53f..751d7d2ab 100644
--- a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step4-typegpu.ts
+++ b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step4-typegpu.ts
@@ -124,6 +124,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step5-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step5-typegpu.ts
index 6b3eb6132..6744b4496 100644
--- a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step5-typegpu.ts
+++ b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step5-typegpu.ts
@@ -149,6 +149,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step6-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step6-typegpu.ts
index d65dbbde9..b168ca079 100644
--- a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step6-typegpu.ts
+++ b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step6-typegpu.ts
@@ -159,6 +159,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step7-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step7-typegpu.ts
index 0549b3f4d..38c87def8 100644
--- a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step7-typegpu.ts
+++ b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step7-typegpu.ts
@@ -172,6 +172,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step8-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step8-typegpu.ts
index 479752152..c6f424e27 100644
--- a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step8-typegpu.ts
+++ b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step8-typegpu.ts
@@ -181,6 +181,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step9-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step9-typegpu.ts
index 06dae7637..628830e91 100644
--- a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step9-typegpu.ts
+++ b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step9-typegpu.ts
@@ -239,8 +239,6 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
 
 const options = {
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/index.mdx b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/index.mdx
index 9216a8a31..0833983b9 100644
--- a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/index.mdx
+++ b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/index.mdx
@@ -172,31 +172,6 @@ function frame() {
   requestAnimationFrame(frame);
 }
 
-frame();
-```
-<Aside type="danger" title="The code above draws nothing!">
-  The `renderPipeline.execute(...)` method encodes all the necessary commands, but they will not be queued until we either read from a buffer or call `root.flush()`.
-</Aside>
-
-```diff lang=ts
-function frame() {
-  renderPipeline.execute({
-    colorAttachments: [
-      {
-        view: context.getCurrentTexture().createView(),
-        clearValue: [0, 0, 0, 0],
-        loadOp: 'clear',
-        storeOp: 'store',
-      },
-    ],
-    vertexCount: 3,
-  });
-
-+  root.flush();
-
-  requestAnimationFrame(frame);
-}
-
 frame();
 ```
 
@@ -503,8 +478,6 @@ Let's explore both options.
       vertexCount: 3,
 +    instanceCount: triangleAmount,
     });
-
-    runtime.flush();
   });
   ```
 
@@ -562,8 +535,6 @@ Let's explore both options.
       vertexCount: 3,
 +    instanceCount: triangleAmount,
     });
-
-    runtime.flush();
   });
   ```
 
@@ -610,8 +581,6 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
 ```
 
@@ -812,8 +781,6 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
 ```
 
@@ -946,8 +913,6 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
 ```
 
@@ -1127,8 +1092,6 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
 ```
 
@@ -1500,4 +1463,3 @@ import step9webgpu from 'code/step9-webgpu.ts?raw';
 
 Congratulations! You've successfully implemented the boids flocking algorithm in WebGPU using TypeGPU.
 Along the way, you learned about creating and using a TypeGPU runtime, writing shader code, managing buffers, creating pipelines, and using slots. For more information, refer to the TypeGPU documentation. Thank you for following along and happy coding!
-
diff --git a/apps/typegpu-docs/src/examples/image-processing/blur/index.ts b/apps/typegpu-docs/src/examples/image-processing/blur/index.ts
index 6cd1c8973..1c9fbdf20 100644
--- a/apps/typegpu-docs/src/examples/image-processing/blur/index.ts
+++ b/apps/typegpu-docs/src/examples/image-processing/blur/index.ts
@@ -203,7 +203,6 @@ function render() {
     loadOp: 'clear',
     storeOp: 'store',
   }).draw(3);
-  root['~unstable'].flush();
 }
 render();
 
diff --git a/apps/typegpu-docs/src/examples/rendering/3d-fish/index.ts b/apps/typegpu-docs/src/examples/rendering/3d-fish/index.ts
index 0e3c7861e..2ed70ef25 100644
--- a/apps/typegpu-docs/src/examples/rendering/3d-fish/index.ts
+++ b/apps/typegpu-docs/src/examples/rendering/3d-fish/index.ts
@@ -256,57 +256,57 @@ function frame(timestamp: DOMHighResTimeStamp) {
   lastTimestamp = timestamp;
   cameraBuffer.write(camera);
 
-  computePipeline
-    .with(computeBindGroups[odd ? 1 : 0])
-    .dispatchWorkgroups(p.fishAmount / p.workGroupSize);
-
-  renderPipeline
-    .withColorAttachment({
-      view: context.getCurrentTexture().createView(),
-      clearValue: [
-        p.backgroundColor.x,
-        p.backgroundColor.y,
-        p.backgroundColor.z,
-        1,
-      ],
-      loadOp: 'clear',
-      storeOp: 'store',
-    })
-    .withDepthStencilAttachment({
-      view: depthTexture.createView(),
-      depthClearValue: 1,
-      depthLoadOp: 'clear',
-      depthStoreOp: 'store',
-    })
-    .with(modelVertexLayout, oceanFloorModel.vertexBuffer)
-    .with(renderInstanceLayout, oceanFloorDataBuffer)
-    .with(renderOceanFloorBindGroup)
-    .draw(oceanFloorModel.polygonCount, 1);
-
-  renderPipeline
-    .withColorAttachment({
-      view: context.getCurrentTexture().createView(),
-      clearValue: [
-        p.backgroundColor.x,
-        p.backgroundColor.y,
-        p.backgroundColor.z,
-        1,
-      ],
-      loadOp: 'load',
-      storeOp: 'store',
-    })
-    .withDepthStencilAttachment({
-      view: depthTexture.createView(),
-      depthClearValue: 1,
-      depthLoadOp: 'load',
-      depthStoreOp: 'store',
-    })
-    .with(modelVertexLayout, fishModel.vertexBuffer)
-    .with(renderInstanceLayout, fishDataBuffers[odd ? 1 : 0])
-    .with(renderFishBindGroups[odd ? 1 : 0])
-    .draw(fishModel.polygonCount, p.fishAmount);
-
-  root['~unstable'].flush();
+  root['~unstable'].batch(() => {
+    computePipeline
+      .with(computeBindGroupLayout, computeBindGroups[odd ? 1 : 0])
+      .dispatchWorkgroups(p.fishAmount / p.workGroupSize);
+
+    renderPipeline
+      .withColorAttachment({
+        view: context.getCurrentTexture().createView(),
+        clearValue: [
+          p.backgroundColor.x,
+          p.backgroundColor.y,
+          p.backgroundColor.z,
+          1,
+        ],
+        loadOp: 'clear',
+        storeOp: 'store',
+      })
+      .withDepthStencilAttachment({
+        view: depthTexture.createView(),
+        depthClearValue: 1,
+        depthLoadOp: 'clear',
+        depthStoreOp: 'store',
+      })
+      .with(modelVertexLayout, oceanFloorModel.vertexBuffer)
+      .with(renderInstanceLayout, oceanFloorDataBuffer)
+      .with(renderBindGroupLayout, renderOceanFloorBindGroup)
+      .draw(oceanFloorModel.polygonCount, 1);
+
+    renderPipeline
+      .withColorAttachment({
+        view: context.getCurrentTexture().createView(),
+        clearValue: [
+          p.backgroundColor.x,
+          p.backgroundColor.y,
+          p.backgroundColor.z,
+          1,
+        ],
+        loadOp: 'load',
+        storeOp: 'store',
+      })
+      .withDepthStencilAttachment({
+        view: depthTexture.createView(),
+        depthClearValue: 1,
+        depthLoadOp: 'load',
+        depthStoreOp: 'store',
+      })
+      .with(modelVertexLayout, fishModel.vertexBuffer)
+      .with(renderInstanceLayout, fishDataBuffers[odd ? 1 : 0])
+      .with(renderBindGroupLayout, renderFishBindGroups[odd ? 1 : 0])
+      .draw(fishModel.polygonCount, p.fishAmount);
+  });
 
   requestAnimationFrame(frame);
 }
diff --git a/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/icosphere.ts b/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/icosphere.ts
index e313ffd86..a192c30be 100644
--- a/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/icosphere.ts
+++ b/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/icosphere.ts
@@ -310,8 +310,6 @@ export class IcosphereGenerator {
       .with(bindGroup)
       .dispatchWorkgroups(xGroups, yGroups, 1);
 
-    this.root['~unstable'].flush();
-
     return nextBuffer;
   }
 
diff --git a/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/index.ts b/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/index.ts
index 2808485dd..c3e438904 100644
--- a/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/index.ts
+++ b/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/index.ts
@@ -315,15 +315,13 @@ function render() {
     .with(renderBindGroup)
     .with(textureBindGroup)
     .draw(vertexBuffer.dataType.elementCount);
-
-  root['~unstable'].flush();
 }
 
 function loop() {
   if (exampleDestroyed) {
     return;
   }
-  render();
+  root['~unstable'].batch(render);
   requestAnimationFrame(loop);
 }
 
diff --git a/apps/typegpu-docs/src/examples/rendering/simple-shadow/index.ts b/apps/typegpu-docs/src/examples/rendering/simple-shadow/index.ts
index 839b3f8a8..33c336b83 100644
--- a/apps/typegpu-docs/src/examples/rendering/simple-shadow/index.ts
+++ b/apps/typegpu-docs/src/examples/rendering/simple-shadow/index.ts
@@ -352,7 +352,6 @@ function render() {
       }
     },
   );
-  root['~unstable'].flush();
 }
 frameId = requestAnimationFrame(render);
 
diff --git a/apps/typegpu-docs/src/examples/rendering/two-boxes/index.ts b/apps/typegpu-docs/src/examples/rendering/two-boxes/index.ts
index fb21379bd..6f8d25b94 100644
--- a/apps/typegpu-docs/src/examples/rendering/two-boxes/index.ts
+++ b/apps/typegpu-docs/src/examples/rendering/two-boxes/index.ts
@@ -303,7 +303,6 @@ function render() {
   drawObject(cubeBuffer, bindGroup, 36, 'clear');
   drawObject(secondCubeBuffer, secondBindGroup, 36, 'load');
   drawObject(planeBuffer, planeBindGroup, 6, 'load');
-  root['~unstable'].flush();
 }
 
 function frame() {
diff --git a/apps/typegpu-docs/src/examples/simulation/boids-next/index.ts b/apps/typegpu-docs/src/examples/simulation/boids-next/index.ts
index d67e0e38f..37f1073a8 100644
--- a/apps/typegpu-docs/src/examples/simulation/boids-next/index.ts
+++ b/apps/typegpu-docs/src/examples/simulation/boids-next/index.ts
@@ -273,19 +273,21 @@ function frame() {
 
   even = !even;
 
-  computePipeline
-    .with(computeBindGroups[even ? 0 : 1])
-    .dispatchWorkgroups(triangleAmount);
-
-  renderPipeline
-    .withColorAttachment({
-      view: context.getCurrentTexture().createView(),
-      clearValue: [1, 1, 1, 1],
-      loadOp: 'clear' as const,
-      storeOp: 'store' as const,
-    })
-    .with(instanceLayout, trianglePosBuffers[even ? 1 : 0])
-    .draw(3, triangleAmount);
+  root['~unstable'].batch(() => {
+    computePipeline
+      .with(computeBindGroupLayout, computeBindGroups[even ? 0 : 1])
+      .dispatchWorkgroups(triangleAmount);
+
+    renderPipeline
+      .withColorAttachment({
+        view: context.getCurrentTexture().createView(),
+        clearValue: [1, 1, 1, 1],
+        loadOp: 'clear' as const,
+        storeOp: 'store' as const,
+      })
+      .with(instanceLayout, trianglePosBuffers[even ? 1 : 0])
+      .draw(3, triangleAmount);
+  });
 
   requestAnimationFrame(frame);
 }
diff --git a/apps/typegpu-docs/src/examples/simulation/confetti/index.ts b/apps/typegpu-docs/src/examples/simulation/confetti/index.ts
index c8fbb6d8a..859a6a243 100644
--- a/apps/typegpu-docs/src/examples/simulation/confetti/index.ts
+++ b/apps/typegpu-docs/src/examples/simulation/confetti/index.ts
@@ -210,16 +210,18 @@ onFrame((dt) => {
   deltaTime.write(dt);
   aspectRatio.write(canvas.width / canvas.height);
 
-  computePipeline.dispatchWorkgroups(PARTICLE_AMOUNT);
-
-  renderPipeline
-    .withColorAttachment({
-      view: context.getCurrentTexture().createView(),
-      clearValue: [0, 0, 0, 0],
-      loadOp: 'clear' as const,
-      storeOp: 'store' as const,
-    })
-    .draw(4, PARTICLE_AMOUNT);
+  root['~unstable'].batch(() => {
+    computePipeline.dispatchWorkgroups(PARTICLE_AMOUNT);
+
+    renderPipeline
+      .withColorAttachment({
+        view: context.getCurrentTexture().createView(),
+        clearValue: [0, 0, 0, 0],
+        loadOp: 'clear' as const,
+        storeOp: 'store' as const,
+      })
+      .draw(4, PARTICLE_AMOUNT);
+  });
 });
 
 // example controls and cleanup
diff --git a/apps/typegpu-docs/src/examples/simulation/fluid-double-buffering/index.ts b/apps/typegpu-docs/src/examples/simulation/fluid-double-buffering/index.ts
index d2cf6ae20..fe86bc54f 100644
--- a/apps/typegpu-docs/src/examples/simulation/fluid-double-buffering/index.ts
+++ b/apps/typegpu-docs/src/examples/simulation/fluid-double-buffering/index.ts
@@ -595,10 +595,12 @@ onFrame((deltaTime) => {
   msSinceLastTick += deltaTime;
 
   if (msSinceLastTick >= timestep) {
-    for (let i = 0; i < stepsPerTick; ++i) {
-      tick();
-    }
-    primary.render();
+    root['~unstable'].batch(() => {
+      for (let i = 0; i < stepsPerTick; ++i) {
+        tick();
+      }
+      primary.render();
+    });
     msSinceLastTick -= timestep;
   }
 });
diff --git a/apps/typegpu-docs/src/examples/simulation/fluid-with-atomics/index.ts b/apps/typegpu-docs/src/examples/simulation/fluid-with-atomics/index.ts
index 0a51e12ac..957f51503 100644
--- a/apps/typegpu-docs/src/examples/simulation/fluid-with-atomics/index.ts
+++ b/apps/typegpu-docs/src/examples/simulation/fluid-with-atomics/index.ts
@@ -372,7 +372,6 @@ function resetGameData() {
         storeOp: 'store' as const,
       })
       .draw(4, options.size ** 2);
-
     currentStateBuffer.copyFrom(nextState.buffer);
   };
 
@@ -397,7 +396,7 @@ function resetGameData() {
 
   createSampleScene();
   applyDrawCanvas();
-  render();
+  root['~unstable'].batch(render);
 }
 
 let isDrawing = false;
diff --git a/apps/typegpu-docs/src/examples/simulation/gravity/index.ts b/apps/typegpu-docs/src/examples/simulation/gravity/index.ts
index 9dfdf0d73..753f00a5d 100644
--- a/apps/typegpu-docs/src/examples/simulation/gravity/index.ts
+++ b/apps/typegpu-docs/src/examples/simulation/gravity/index.ts
@@ -199,7 +199,7 @@ function frame(timestamp: DOMHighResTimeStamp) {
     passed: Math.min((timestamp - lastTimestamp) / 1000, 0.1),
   });
   lastTimestamp = timestamp;
-  render();
+  root['~unstable'].batch(render);
   requestAnimationFrame(frame);
 }
 requestAnimationFrame(frame);
diff --git a/apps/typegpu-docs/src/examples/tests/texture-test/index.ts b/apps/typegpu-docs/src/examples/tests/texture-test/index.ts
index 8b97f52ba..efb7ea619 100644
--- a/apps/typegpu-docs/src/examples/tests/texture-test/index.ts
+++ b/apps/typegpu-docs/src/examples/tests/texture-test/index.ts
@@ -87,7 +87,6 @@ function render() {
       storeOp: 'store',
     })
     .draw(3);
-  root['~unstable'].flush();
   requestAnimationFrame(render);
 }
 requestAnimationFrame(render);
diff --git a/packages/typegpu/src/core/buffer/buffer.ts b/packages/typegpu/src/core/buffer/buffer.ts
index 61ded5373..6603e3fa7 100644
--- a/packages/typegpu/src/core/buffer/buffer.ts
+++ b/packages/typegpu/src/core/buffer/buffer.ts
@@ -330,8 +330,10 @@ class TgpuBufferImpl<TData extends AnyData> implements TgpuBuffer<TData> {
       this._hostBuffer = new ArrayBuffer(size);
     }
 
-    // Flushing any commands yet to be encoded.
-    this._group.flush();
+    if (this._group[$internal].batchState.ongoingBatch) {
+      // Flushing any commands yet to be encoded. This can happen only inside a batch.
+      this._group[$internal].flush();
+    }
 
     this._writeToTarget(this._hostBuffer, data);
     device.queue.writeBuffer(gpuBuffer, 0, this._hostBuffer, 0, size);
@@ -351,6 +353,11 @@ class TgpuBufferImpl<TData extends AnyData> implements TgpuBuffer<TData> {
         mappedView.set(instruction.data, instruction.data.byteOffset);
       }
     } else {
+      if (this._group[$internal].batchState.ongoingBatch) {
+        // Flushing any commands yet to be encoded. This can happen only inside a batch.
+        this._group[$internal].flush();
+      }
+
       for (const instruction of instructions) {
         device.queue.writeBuffer(
           gpuBuffer,
@@ -373,7 +380,7 @@ class TgpuBufferImpl<TData extends AnyData> implements TgpuBuffer<TData> {
     }
 
     // Flushing any commands yet to be encoded.
-    this._group.flush();
+    this._group[$internal].flush();
 
     const encoder = device.createCommandEncoder();
     encoder.clearBuffer(gpuBuffer);
@@ -386,13 +393,15 @@ class TgpuBufferImpl<TData extends AnyData> implements TgpuBuffer<TData> {
     }
 
     const size = sizeOf(this.dataType);
-    const encoder = this._group.commandEncoder;
+    const encoder = this._group[$internal].commandEncoder;
     encoder.copyBufferToBuffer(srcBuffer.buffer, 0, this.buffer, 0, size);
   }
 
   async read(): Promise<Infer<TData>> {
-    // Flushing any commands yet to be encoded.
-    this._group.flush();
+    if (this._group[$internal].batchState.ongoingBatch) {
+      // Flushing any commands yet to be encoded. This can happen only inside a batch.
+      this._group[$internal].flush();
+    }
 
     const gpuBuffer = this.buffer;
     const device = this._group.device;
diff --git a/packages/typegpu/src/core/pipeline/computePipeline.ts b/packages/typegpu/src/core/pipeline/computePipeline.ts
index 7abe63512..458648b0c 100644
--- a/packages/typegpu/src/core/pipeline/computePipeline.ts
+++ b/packages/typegpu/src/core/pipeline/computePipeline.ts
@@ -190,7 +190,9 @@ class TgpuComputePipelineImpl implements TgpuComputePipeline {
       ...setupTimestampWrites(this._priors, branch),
     };
 
-    const pass = branch.commandEncoder.beginComputePass(passDescriptor);
+    const pass = branch[$internal].commandEncoder.beginComputePass(
+      passDescriptor,
+    );
 
     pass.setPipeline(memo.pipeline);
 
@@ -217,15 +219,22 @@ class TgpuComputePipelineImpl implements TgpuComputePipeline {
     pass.dispatchWorkgroups(x, y, z);
     pass.end();
 
-    if (memo.logResources) {
-      logDataFromGPU(memo.logResources);
+    const hasPerformanceCallback = !!this._priors.performanceCallback;
+    const isOngoingBatch = branch[$internal].batchState.ongoingBatch;
+
+    if (hasPerformanceCallback && isOngoingBatch) {
+      branch[$internal].batchState.performanceCallbacks.push(() =>
+        triggerPerformanceCallback({ root: branch, priors: this._priors })
+      );
+    } else if (!isOngoingBatch) {
+      branch[$internal].flush();
+      if (hasPerformanceCallback) {
+        triggerPerformanceCallback({ root: branch, priors: this._priors });
+      }
     }
 
-    if (this._priors.performanceCallback) {
-      triggerPerformanceCallback({
-        root: branch,
-        priors: this._priors,
-      });
+    if (memo.logResources) {
+      logDataFromGPU(memo.logResources);
     }
   }
 
diff --git a/packages/typegpu/src/core/pipeline/renderPipeline.ts b/packages/typegpu/src/core/pipeline/renderPipeline.ts
index 77613d581..d217a6826 100644
--- a/packages/typegpu/src/core/pipeline/renderPipeline.ts
+++ b/packages/typegpu/src/core/pipeline/renderPipeline.ts
@@ -532,7 +532,9 @@ class TgpuRenderPipelineImpl implements TgpuRenderPipeline {
       }
     }
 
-    const pass = branch.commandEncoder.beginRenderPass(renderPassDescriptor);
+    const pass = branch[$internal].commandEncoder.beginRenderPass(
+      renderPassDescriptor,
+    );
 
     pass.setPipeline(memo.pipeline);
 
@@ -589,16 +591,23 @@ class TgpuRenderPipelineImpl implements TgpuRenderPipeline {
 
     pass.end();
 
+    const hasPerformanceCallback = !!internals.priors.performanceCallback;
+    const isOngoingBatch = branch[$internal].batchState.ongoingBatch;
+
+    if (hasPerformanceCallback && isOngoingBatch) {
+      branch[$internal].batchState.performanceCallbacks.push(() =>
+        triggerPerformanceCallback({ root: branch, priors: internals.priors })
+      );
+    } else if (!isOngoingBatch) {
+      branch[$internal].flush();
+      if (hasPerformanceCallback) {
+        triggerPerformanceCallback({ root: branch, priors: internals.priors });
+      }
+    }
+
     if (logResources) {
       logDataFromGPU(logResources);
     }
-
-    internals.priors.performanceCallback
-      ? triggerPerformanceCallback({
-        root: branch,
-        priors: internals.priors,
-      })
-      : branch.flush();
   }
 
   drawIndexed(
@@ -641,12 +650,19 @@ class TgpuRenderPipelineImpl implements TgpuRenderPipeline {
 
     pass.end();
 
-    internals.priors.performanceCallback
-      ? triggerPerformanceCallback({
-        root: branch,
-        priors: internals.priors,
-      })
-      : branch.flush();
+    const hasPerformanceCallback = !!internals.priors.performanceCallback;
+    const isOngoingBatch = branch[$internal].batchState.ongoingBatch;
+
+    if (hasPerformanceCallback && isOngoingBatch) {
+      branch[$internal].batchState.performanceCallbacks.push(() =>
+        triggerPerformanceCallback({ root: branch, priors: internals.priors })
+      );
+    } else if (!isOngoingBatch) {
+      branch[$internal].flush();
+      if (hasPerformanceCallback) {
+        triggerPerformanceCallback({ root: branch, priors: internals.priors });
+      }
+    }
   }
 }
 
diff --git a/packages/typegpu/src/core/pipeline/timeable.ts b/packages/typegpu/src/core/pipeline/timeable.ts
index 18d6c6956..46aac3f4e 100644
--- a/packages/typegpu/src/core/pipeline/timeable.ts
+++ b/packages/typegpu/src/core/pipeline/timeable.ts
@@ -151,7 +151,12 @@ export function triggerPerformanceCallback({
     );
   }
 
-  root.commandEncoder.resolveQuerySet(
+  // we don't want to override content of unavailable querySet
+  if (!querySet.available) {
+    return;
+  }
+
+  root[$internal].commandEncoder.resolveQuerySet(
     root.unwrap(querySet),
     0,
     querySet.count,
@@ -159,12 +164,7 @@ export function triggerPerformanceCallback({
     0,
   );
 
-  root.flush();
-  root.device.queue.onSubmittedWorkDone().then(async () => {
-    if (!querySet.available) {
-      return;
-    }
-    const result = await querySet.read();
+  querySet.read().then(async (result) => {
     const start =
       result[priors.timestampWrites?.beginningOfPassWriteIndex ?? 0];
     const end = result[priors.timestampWrites?.endOfPassWriteIndex ?? 1];
diff --git a/packages/typegpu/src/core/querySet/querySet.ts b/packages/typegpu/src/core/querySet/querySet.ts
index c9bd7c83d..e30f4fce0 100644
--- a/packages/typegpu/src/core/querySet/querySet.ts
+++ b/packages/typegpu/src/core/querySet/querySet.ts
@@ -122,7 +122,7 @@ class TgpuQuerySetImpl<T extends GPUQueryType> implements TgpuQuerySet<T> {
       throw new Error('This QuerySet is busy resolving or reading.');
     }
 
-    const commandEncoder = this._group.device.createCommandEncoder();
+    const commandEncoder = this._group[$internal].commandEncoder;
     commandEncoder.resolveQuerySet(
       this.querySet,
       0,
@@ -130,18 +130,16 @@ class TgpuQuerySetImpl<T extends GPUQueryType> implements TgpuQuerySet<T> {
       this[$internal].resolveBuffer,
       0,
     );
-    this._group.device.queue.submit([commandEncoder.finish()]);
   }
 
   async read(): Promise<bigint[]> {
-    this._group.flush();
     if (!this._resolveBuffer) {
       throw new Error('QuerySet must be resolved before reading.');
     }
 
     this._available = false;
     try {
-      const commandEncoder = this._group.device.createCommandEncoder();
+      const commandEncoder = this._group[$internal].commandEncoder;
       commandEncoder.copyBufferToBuffer(
         this[$internal].resolveBuffer,
         0,
@@ -149,8 +147,7 @@ class TgpuQuerySetImpl<T extends GPUQueryType> implements TgpuQuerySet<T> {
         0,
         this.count * BigUint64Array.BYTES_PER_ELEMENT,
       );
-      this._group.device.queue.submit([commandEncoder.finish()]);
-      await this._group.device.queue.onSubmittedWorkDone();
+      this._group[$internal].flush();
 
       const readBuffer = this[$internal].readBuffer;
       await readBuffer.mapAsync(GPUMapMode.READ);
diff --git a/packages/typegpu/src/core/root/init.ts b/packages/typegpu/src/core/root/init.ts
index a54640805..ca3ad2b89 100644
--- a/packages/typegpu/src/core/root/init.ts
+++ b/packages/typegpu/src/core/root/init.ts
@@ -95,6 +95,7 @@ import type {
   ExperimentalTgpuRoot,
   RenderPass,
   TgpuRoot,
+  TgpuRootInternals,
   WithBinding,
   WithCompute,
   WithFragment,
@@ -259,6 +260,7 @@ interface Disposable {
  */
 class TgpuRootImpl extends WithBindingImpl
   implements TgpuRoot, ExperimentalTgpuRoot {
+  readonly [$internal]: TgpuRootInternals;
   '~unstable': Omit<ExperimentalTgpuRoot, keyof TgpuRoot>;
 
   private _disposables: Disposable[] = [];
@@ -270,12 +272,6 @@ class TgpuRootImpl extends WithBindingImpl
     key.unwrap(this)
   );
 
-  private _commandEncoder: GPUCommandEncoder | null = null;
-
-  [$internal]: {
-    logOptions: LogGeneratorOptions;
-  };
-
   constructor(
     public readonly device: GPUDevice,
     public readonly nameRegistrySetting: 'random' | 'strict',
@@ -286,17 +282,70 @@ class TgpuRootImpl extends WithBindingImpl
     super(() => this, []);
 
     this['~unstable'] = this;
+
+    let commandEncoder: GPUCommandEncoder | undefined;
     this[$internal] = {
       logOptions,
+      batchState: {
+        ongoingBatch: false,
+        performanceCallbacks: [],
+      },
+
+      get commandEncoder() {
+        commandEncoder ??= device.createCommandEncoder();
+
+        return commandEncoder;
+      },
+
+      flush() {
+        if (!commandEncoder) {
+          return;
+        }
+
+        device.queue.submit([commandEncoder.finish()]);
+        commandEncoder = undefined;
+      },
     };
   }
 
-  get commandEncoder() {
-    if (!this._commandEncoder) {
-      this._commandEncoder = this.device.createCommandEncoder();
+  batch<T>(
+    ...args: T extends Promise<unknown> ? [
+        'Batch operations must be synchronous. Async functions are not allowed. Use synchronous callbacks only.',
+      ]
+      : [callback: () => T]
+  ) {
+    const [callback] = args as [() => T];
+
+    const { batchState } = this[$internal];
+    const isOuterBatch = !batchState.ongoingBatch;
+    const performanceCallbackIdx = batchState.performanceCallbacks.length;
+
+    if (isOuterBatch) {
+      batchState.ongoingBatch = true;
     }
 
-    return this._commandEncoder;
+    try {
+      callback();
+    } finally {
+      this[$internal].flush();
+
+      for (
+        const performanceCallback of batchState.performanceCallbacks.slice(
+          performanceCallbackIdx,
+        )
+      ) {
+        performanceCallback();
+      }
+
+      batchState.performanceCallbacks = batchState.performanceCallbacks.slice(
+        0,
+        performanceCallbackIdx,
+      );
+
+      if (isOuterBatch) {
+        batchState.ongoingBatch = false;
+      }
+    }
   }
 
   get enabledFeatures() {
@@ -515,7 +564,7 @@ class TgpuRootImpl extends WithBindingImpl
     descriptor: GPURenderPassDescriptor,
     callback: (pass: RenderPass) => void,
   ): void {
-    const pass = this.commandEncoder.beginRenderPass(descriptor);
+    const pass = this[$internal].commandEncoder.beginRenderPass(descriptor);
 
     const bindGroups = new Map<
       TgpuBindGroupLayout,
@@ -662,15 +711,9 @@ class TgpuRootImpl extends WithBindingImpl
     });
 
     pass.end();
-  }
-
-  flush() {
-    if (!this._commandEncoder) {
-      return;
+    if (!this[$internal].batchState.ongoingBatch) {
+      this[$internal].flush();
     }
-
-    this.device.queue.submit([this._commandEncoder.finish()]);
-    this._commandEncoder = null;
   }
 }
 
diff --git a/packages/typegpu/src/core/root/rootTypes.ts b/packages/typegpu/src/core/root/rootTypes.ts
index 66f734121..80f64507d 100644
--- a/packages/typegpu/src/core/root/rootTypes.ts
+++ b/packages/typegpu/src/core/root/rootTypes.ts
@@ -19,12 +19,12 @@ import type {
   IsValidStorageSchema,
   IsValidUniformSchema,
 } from '../../shared/repr.ts';
-import { $internal } from '../../shared/symbols.ts';
 import type {
   Mutable,
   OmitProps,
   Prettify,
 } from '../../shared/utilityTypes.ts';
+import { $internal } from '../../shared/symbols.ts';
 import type {
   ExtractBindGroupInputFromLayout,
   TgpuBindGroup,
@@ -485,10 +485,6 @@ export type ValidateUniformSchema<TData extends AnyData> =
     : TData;
 
 export interface TgpuRoot extends Unwrapper {
-  [$internal]: {
-    logOptions: LogGeneratorOptions;
-  };
-
   /**
    * The GPU device associated with this root.
    */
@@ -667,19 +663,40 @@ export interface TgpuRoot extends Unwrapper {
    */
   destroy(): void;
 
+  readonly [$internal]: TgpuRootInternals;
   '~unstable': Omit<ExperimentalTgpuRoot, keyof TgpuRoot>;
 }
+export interface TgpuRootInternals {
+  logOptions: LogGeneratorOptions;
+  /**
+   * This state is used to determine if we should submit command buffer immediately to the device queue.
+   * Also, it holds performance callbacks to invoke after flushing.
+   */
+  readonly batchState: {
+    ongoingBatch: boolean;
+    performanceCallbacks: (() => void)[];
+  };
+  /**
+   * The current command encoder. This property
+   * holds the same value throughout the entire `batch()` invocation,
+   * unless you use pipeline with performance callback.
+   * In case of single `draw()` or `drawIndexed()` or `dispatchWorkgroups()` call, getter will be used
+   * to create a single-use command encoder.
+   */
+  readonly commandEncoder: GPUCommandEncoder;
+  /**
+   * Causes all commands enqueued by pipelines to be
+   * submitted to the GPU.
+   * If there is no ongoing batch, `flush()` is executed after each `draw()` or `drawIndexed()` or `dispatchWorkgroups()` command.
+   */
+  flush(): void;
+}
 
 export interface ExperimentalTgpuRoot extends TgpuRoot, WithBinding {
   readonly nameRegistrySetting: 'strict' | 'random';
   readonly shaderGenerator?:
     | ShaderGenerator
     | undefined;
-  /**
-   * The current command encoder. This property will
-   * hold the same value until `flush()` is called.
-   */
-  readonly commandEncoder: GPUCommandEncoder;
 
   createTexture<
     TWidth extends number,
@@ -720,8 +737,21 @@ export interface ExperimentalTgpuRoot extends TgpuRoot, WithBinding {
   ): void;
 
   /**
-   * Causes all commands enqueued by pipelines to be
-   * submitted to the GPU.
+   * Executes a batch of commands.
+   *
+   * The commands inside `callback` are recorded into a single command buffer when possible
+   * and then submitted to the device queue in one submission.
+   *
+   * The `callback` must be synchronous.
+   *
+   * While typically used for GPU computations, the batch may also contain other command types.
+   *
+   * @param callback A synchronous function containing the commands to batch.
    */
-  flush(): void;
+  batch<T>(
+    ...args: T extends Promise<unknown> ? [
+        'Batch operations must be synchronous. Async functions are not allowed. Use synchronous callbacks only.',
+      ]
+      : [callback: () => T]
+  ): void;
 }
diff --git a/packages/typegpu/src/prepareDispatch.ts b/packages/typegpu/src/prepareDispatch.ts
index 76989090a..e91762743 100644
--- a/packages/typegpu/src/prepareDispatch.ts
+++ b/packages/typegpu/src/prepareDispatch.ts
@@ -116,7 +116,6 @@ export function prepareDispatch<TArgs extends number[]>(
         workgroupCount.y,
         workgroupCount.z,
       );
-      root['~unstable'].flush();
     }) as DispatchForArgs<TArgs>;
 
   return new PreparedDispatch(createDispatch, pipeline);
diff --git a/packages/typegpu/tests/batch.test.ts b/packages/typegpu/tests/batch.test.ts
new file mode 100644
index 000000000..dccd2a012
--- /dev/null
+++ b/packages/typegpu/tests/batch.test.ts
@@ -0,0 +1,387 @@
+import { beforeEach, describe, expect, type TestAPI, vi } from 'vitest';
+import * as d from '../src/data/index.ts';
+import tgpu, {
+  prepareDispatch,
+  type TgpuComputePipeline,
+  type TgpuRenderPipeline,
+} from '../src/index.ts';
+import { $internal } from '../src/shared/symbols.ts';
+import { it } from './utils/extendedIt.ts';
+import type { RenderPass } from '../src/core/root/rootTypes.ts';
+
+describe('Batch', () => {
+  const entryFn = tgpu['~unstable'].computeFn({ workgroupSize: [7] })(() => {});
+  const vertexFn = tgpu['~unstable'].vertexFn({
+    out: { pos: d.builtin.position },
+  })(() => {
+    return { pos: d.vec4f() };
+  });
+  const fragmentFn = tgpu['~unstable'].fragmentFn({
+    out: d.vec4f,
+  })(() => d.vec4f());
+
+  let renderPipeline: TgpuRenderPipeline;
+  let computePipeline: TgpuComputePipeline;
+
+  type ExtendedTestContext<T> = T extends TestAPI<infer U> ? U : never;
+  beforeEach<ExtendedTestContext<typeof it>>(({ root }) => {
+    renderPipeline = root
+      .withVertex(vertexFn, {})
+      .withFragment(fragmentFn, { format: 'rgba8unorm' })
+      .createPipeline()
+      .withColorAttachment({
+        view: {} as unknown as GPUTextureView,
+        loadOp: 'clear',
+        storeOp: 'store',
+      });
+    computePipeline = root
+      .withCompute(entryFn)
+      .createPipeline();
+  });
+
+  it('flushes only once when used without performance callback', ({ root }) => {
+    const flushMock = vi.spyOn(root[$internal], 'flush');
+
+    root.batch(() => {
+      renderPipeline.draw(7);
+      computePipeline.dispatchWorkgroups(7);
+      renderPipeline.draw(7);
+      expect(flushMock).toBeCalledTimes(0);
+    });
+
+    expect(flushMock).toBeCalledTimes(1);
+  });
+
+  it('flushes only once when used with performance callbacks and callbacks are invoked', async ({ root }) => {
+    const querySet = root.createQuerySet('timestamp', 2);
+
+    let resolve: () => void;
+    let donePerformancing: Promise<void>;
+    donePerformancing = new Promise<void>((r) => {
+      resolve = r;
+    });
+    const callback = vi.fn(() => {
+      resolve();
+    });
+
+    const renderPipelineWithPerformance = renderPipeline
+      .withPerformanceCallback(callback);
+
+    const renderPipelineWithTimestampWrites = renderPipeline
+      .withTimestampWrites({
+        querySet,
+        beginningOfPassWriteIndex: 0,
+        endOfPassWriteIndex: 1,
+      });
+
+    const flushMock = vi.spyOn(root[$internal], 'flush');
+
+    // trying different permutations
+    root.batch(() => {
+      computePipeline.dispatchWorkgroups(7);
+      renderPipelineWithPerformance.draw(7);
+      renderPipelineWithTimestampWrites.draw(7);
+      expect(flushMock).toBeCalledTimes(0);
+      expect(callback).toBeCalledTimes(0);
+    });
+
+    // first from batch itself, second from querySet.read
+    expect(flushMock).toBeCalledTimes(2);
+    await donePerformancing;
+    expect(callback).toBeCalledTimes(1);
+
+    flushMock.mockClear();
+    callback.mockClear();
+    donePerformancing = new Promise<void>((r) => {
+      resolve = r;
+    });
+
+    root.batch(() => {
+      renderPipelineWithPerformance.draw(7);
+      computePipeline.dispatchWorkgroups(7);
+      renderPipelineWithTimestampWrites.draw(7);
+      expect(flushMock).toBeCalledTimes(0);
+      expect(callback).toBeCalledTimes(0);
+    });
+
+    // first from batch, second from querySet.read
+    expect(flushMock).toBeCalledTimes(2);
+    await donePerformancing;
+    expect(callback).toBeCalledTimes(1);
+
+    flushMock.mockClear();
+    callback.mockClear();
+    donePerformancing = new Promise<void>((r) => {
+      resolve = r;
+    });
+
+    root.batch(() => {
+      renderPipelineWithTimestampWrites.draw(7);
+      computePipeline.dispatchWorkgroups(7);
+      renderPipelineWithPerformance.draw(7);
+      expect(flushMock).toBeCalledTimes(0);
+      expect(callback).toBeCalledTimes(0);
+    });
+
+    // first from batch, second from querySet.read
+    expect(flushMock).toBeCalledTimes(2);
+    await donePerformancing;
+    expect(callback).toBeCalledTimes(1);
+  });
+
+  it('flushes properly with drawIndexed', async ({ root }) => {
+    const querySet = root.createQuerySet('timestamp', 2);
+    const indexBuffer = root.createBuffer(d.arrayOf(d.u16, 2)).$usage('index');
+
+    let resolve: () => void;
+    const donePerformancing = new Promise<void>((r) => {
+      resolve = r;
+    });
+    const callback = vi.fn(() => {
+      resolve();
+    });
+
+    const renderPipeline1 = renderPipeline.withIndexBuffer(indexBuffer);
+    const renderPipeline2 = renderPipeline
+      .withPerformanceCallback(callback)
+      .withIndexBuffer(indexBuffer);
+    const renderPipeline3 = renderPipeline
+      .withTimestampWrites({
+        querySet,
+        beginningOfPassWriteIndex: 0,
+        endOfPassWriteIndex: 1,
+      })
+      .withIndexBuffer(indexBuffer);
+
+    const flushMock = vi.spyOn(root[$internal], 'flush');
+
+    root.batch(() => {
+      renderPipeline1.drawIndexed(7);
+      renderPipeline2.drawIndexed(7);
+      renderPipeline3.drawIndexed(7);
+      expect(flushMock).toBeCalledTimes(0);
+      expect(callback).toBeCalledTimes(0);
+    });
+
+    // first from batch, second from querySet.read
+    expect(flushMock).toBeCalledTimes(2);
+    await donePerformancing;
+    expect(callback).toBeCalledTimes(1);
+  });
+
+  it('flushes properly with beginRenderPass', ({ root }) => {
+    const bindGroupLayout = tgpu.bindGroupLayout({});
+    const bindGroup = root.createBindGroup(bindGroupLayout, {});
+
+    const renderPassArgs: Parameters<typeof root['beginRenderPass']> = [
+      { colorAttachments: [] },
+      (pass: RenderPass) => {
+        pass.setPipeline(renderPipeline);
+        pass.setBindGroup(bindGroupLayout, bindGroup);
+        pass.draw(7);
+      },
+    ];
+
+    const flushMock = vi.spyOn(root[$internal], 'flush');
+
+    root['~unstable'].beginRenderPass(...renderPassArgs);
+    expect(flushMock).toBeCalledTimes(1);
+
+    root['~unstable'].beginRenderPass(...renderPassArgs);
+    expect(flushMock).toBeCalledTimes(2);
+
+    flushMock.mockClear();
+
+    root['~unstable'].batch(() => {
+      root['~unstable'].beginRenderPass(...renderPassArgs);
+      root['~unstable'].beginRenderPass(...renderPassArgs);
+      expect(flushMock).toBeCalledTimes(0);
+    });
+    expect(flushMock).toBeCalledTimes(1);
+  });
+
+  it('flushes immediately after read-write operation', ({ root }) => {
+    const wBuffer = root.createBuffer(d.arrayOf(d.u32, 7));
+    const rBuffer = root.createBuffer(d.u32, 7);
+
+    const flushMock = vi.spyOn(root[$internal], 'flush');
+
+    root.batch(() => {
+      wBuffer.write([1, 2, 3, 4, 5, 6, 7]);
+      expect(flushMock).toBeCalledTimes(1);
+      wBuffer.writePartial([{ idx: 6, value: 1882 }]);
+      expect(flushMock).toBeCalledTimes(2);
+      rBuffer.read();
+      expect(flushMock).toBeCalledTimes(3);
+    });
+    expect(flushMock).toBeCalledTimes(4);
+  });
+
+  it('handles nested batches with performance callbacks', async ({ root }) => {
+    const callback1 = () => {};
+    const callback2 = () => {};
+    const callback3 = () => {};
+
+    const renderPipelineWithPerformance1 = renderPipeline
+      .withPerformanceCallback(callback1);
+    const renderPipelineWithPerformance2 = renderPipeline
+      .withPerformanceCallback(callback2);
+    const renderPipelineWithPerformance3 = renderPipeline
+      .withPerformanceCallback(callback3);
+
+    const flushMock = vi.spyOn(root[$internal], 'flush');
+
+    root.batch(() => {
+      renderPipelineWithPerformance1.draw(7);
+      expect(root[$internal].batchState.performanceCallbacks.length).toBe(1);
+      root.batch(() => {
+        renderPipelineWithPerformance2.draw(7);
+        expect(root[$internal].batchState.performanceCallbacks.length).toBe(2);
+      });
+
+      // first one from batch, second one from querySet.read
+      expect(flushMock).toBeCalledTimes(2);
+      expect(root[$internal].batchState.performanceCallbacks.length).toBe(1);
+
+      renderPipelineWithPerformance3.draw(7);
+
+      expect(root[$internal].batchState.performanceCallbacks.length).toBe(2);
+    });
+
+    expect(flushMock).toBeCalledTimes(5);
+    expect(root[$internal].batchState.performanceCallbacks.length).toBe(0);
+  });
+
+  it('clears callback stack at the end of batch', async ({ root }) => {
+    let resolve1: () => void;
+    const donePerformancing1 = new Promise<void>((r) => {
+      resolve1 = r;
+    });
+    const callback1 = vi.fn(() => {
+      resolve1();
+    });
+    let resolve2: () => void;
+    const donePerformancing2 = new Promise<void>((r) => {
+      resolve2 = r;
+    });
+    const callback2 = vi.fn(() => {
+      resolve2();
+    });
+    let resolve3: () => void;
+    const donePerformancing3 = new Promise<void>((r) => {
+      resolve3 = r;
+    });
+    const callback3 = vi.fn(() => {
+      resolve3();
+    });
+    let resolve4: () => void;
+    const donePerformancing4 = new Promise<void>((r) => {
+      resolve4 = r;
+    });
+    const callback4 = vi.fn(() => {
+      resolve4();
+    });
+
+    const renderPipelineWithPerformance1 = renderPipeline
+      .withPerformanceCallback(callback1);
+    const renderPipelineWithPerformance2 = renderPipeline
+      .withPerformanceCallback(callback2);
+    const renderPipelineWithPerformance3 = renderPipeline
+      .withPerformanceCallback(callback3);
+    const renderPipelineWithPerformance4 = renderPipeline
+      .withPerformanceCallback(callback4);
+
+    root.batch(() => {
+      renderPipelineWithPerformance1.draw(7);
+      renderPipelineWithPerformance2.draw(7);
+    });
+
+    await Promise.all([
+      donePerformancing1,
+      donePerformancing2,
+    ]);
+
+    expect(callback1).toBeCalledTimes(1);
+    expect(callback2).toBeCalledTimes(1);
+    expect(callback3).toBeCalledTimes(0);
+    expect(callback4).toBeCalledTimes(0);
+
+    expect(root[$internal].batchState.performanceCallbacks.length).toBe(0);
+
+    root.batch(() => {
+      renderPipelineWithPerformance3.draw(7);
+      renderPipelineWithPerformance4.draw(7);
+    });
+
+    await Promise.all([
+      donePerformancing3,
+      donePerformancing4,
+    ]);
+
+    expect(callback1).toBeCalledTimes(1);
+    expect(callback2).toBeCalledTimes(1);
+    expect(callback3).toBeCalledTimes(1);
+    expect(callback4).toBeCalledTimes(1);
+    expect(root[$internal].batchState.performanceCallbacks.length).toBe(0);
+  });
+
+  it('handles prepareDispatch().dispatch', ({ root }) => {
+    const flushMock = vi.spyOn(root[$internal], 'flush');
+
+    prepareDispatch(root, () => {
+      'kernel';
+    }).dispatch();
+    expect(flushMock).toBeCalledTimes(1);
+
+    root['~unstable'].batch(() => {
+      prepareDispatch(root, () => {
+        'kernel';
+      }).dispatch();
+      // from write inside dispatch
+      expect(flushMock).toBeCalledTimes(2);
+    });
+    expect(flushMock).toBeCalledTimes(3);
+  });
+
+  it('flushes immediately after pipeline with console.log', ({ device }) => {
+    const root = tgpu.initFromDevice({
+      device,
+      unstable_logOptions: {
+        logCountLimit: 32,
+        logSizeLimit: 8,
+      },
+    });
+    const f = tgpu['~unstable'].computeFn({ workgroupSize: [1] })(() => {
+      console.log(d.u32(7));
+    });
+    const pipeline = root['~unstable'].withCompute(f).createPipeline();
+
+    const flushMock = vi.spyOn(root[$internal], 'flush');
+
+    pipeline.dispatchWorkgroups(1, 1, 1);
+    expect(flushMock).not.toBeCalledTimes(0);
+
+    flushMock.mockClear();
+
+    root['~unstable'].batch(() => {
+      pipeline.dispatchWorkgroups(1, 1, 1);
+      expect(flushMock).not.toBeCalledTimes(0);
+    });
+  });
+
+  it('restores auto-flush', ({ root }) => {
+    const flushMock = vi.spyOn(root[$internal], 'flush');
+
+    computePipeline.dispatchWorkgroups(7, 7, 7);
+    expect(flushMock).toBeCalledTimes(1);
+
+    root.batch(() => {
+      computePipeline.dispatchWorkgroups(7, 7, 7);
+      expect(flushMock).toBeCalledTimes(1);
+    });
+    expect(flushMock).toBeCalledTimes(2);
+
+    computePipeline.dispatchWorkgroups(7, 7, 7);
+    expect(flushMock).toBeCalledTimes(3);
+  });
+});
diff --git a/packages/typegpu/tests/buffer.test.ts b/packages/typegpu/tests/buffer.test.ts
index 497d5fb15..1fd810a76 100644
--- a/packages/typegpu/tests/buffer.test.ts
+++ b/packages/typegpu/tests/buffer.test.ts
@@ -1,5 +1,7 @@
+import { $internal } from '../src/shared/symbols.ts';
+import { it } from './utils/extendedIt.ts';
 import { attest } from '@ark/attest';
-import { describe, expect, expectTypeOf } from 'vitest';
+import { describe, expect, expectTypeOf, vi } from 'vitest';
 import * as d from '../src/data/index.ts';
 import type { ValidateBufferSchema, ValidUsagesFor } from '../src/index.ts';
 import { getName } from '../src/shared/meta.ts';
@@ -8,7 +10,6 @@ import type {
   IsValidUniformSchema,
 } from '../src/shared/repr.ts';
 import type { TypedArray } from '../src/shared/utilityTypes.ts';
-import { it } from './utils/extendedIt.ts';
 
 function toUint8Array(...arrays: Array<TypedArray>): Uint8Array {
   let totalByteLength = 0;
@@ -572,6 +573,43 @@ describe('TgpuBuffer', () => {
       ]
     >();
   });
+
+  it('should not flush command encoder inside write', ({ root }) => {
+    const buffer = root.createBuffer(d.u32, 7);
+
+    vi.spyOn(root[$internal], 'flush');
+    buffer.write(1929);
+    expect(root[$internal].flush).toBeCalledTimes(0);
+  });
+
+  it('should not flush command encoder inside writePartial', ({ root }) => {
+    const buffer = root.createBuffer(
+      d.arrayOf(d.struct({ foo: d.u32, bar: d.u32, baz: d.u32 }), 7),
+    );
+
+    vi.spyOn(root[$internal], 'flush');
+    buffer.writePartial([
+      { idx: 0, value: { foo: 7, bar: 7, baz: 7 } },
+      { idx: 3, value: { foo: 3, bar: 3, baz: 3 } },
+    ]);
+    expect(root[$internal].flush).toBeCalledTimes(0);
+  });
+
+  it('should not flush command encoder inside read', ({ root }) => {
+    const buffer = root.createBuffer(d.u32, 7);
+
+    vi.spyOn(root[$internal], 'flush');
+    buffer.read();
+    expect(root[$internal].flush).toBeCalledTimes(0);
+  });
+
+  it('should flush command encoder inside clear', ({ root }) => {
+    const buffer = root.createBuffer(d.u32, 7);
+
+    vi.spyOn(root[$internal], 'flush');
+    buffer.clear();
+    expect(root[$internal].flush).toBeCalledTimes(1);
+  });
 });
 
 describe('IsValidUniformSchema', () => {
diff --git a/packages/typegpu/tests/computePipeline.test.ts b/packages/typegpu/tests/computePipeline.test.ts
index 29234f77d..a48e94983 100644
--- a/packages/typegpu/tests/computePipeline.test.ts
+++ b/packages/typegpu/tests/computePipeline.test.ts
@@ -557,4 +557,57 @@ describe('TgpuComputePipeline', () => {
       }"
     `);
   });
+
+  describe('Flush', () => {
+    const entryFn = tgpu['~unstable'].computeFn({ workgroupSize: [1] })(
+      () => {},
+    );
+
+    it('flushes after dispatchWorkgroups', ({ root }) => {
+      const pipeline = root
+        .withCompute(entryFn)
+        .createPipeline();
+
+      vi.spyOn(root[$internal], 'flush');
+
+      pipeline.dispatchWorkgroups(777);
+
+      expect(root[$internal].flush).toBeCalledTimes(1);
+    });
+
+    it('flushes after dispatchWorkgroups with performance callback', ({ root }) => {
+      const callback = vi.fn();
+
+      const pipeline = root
+        .withCompute(entryFn)
+        .createPipeline()
+        .withPerformanceCallback(callback);
+
+      vi.spyOn(root[$internal], 'flush');
+
+      pipeline.dispatchWorkgroups(777);
+
+      // first from pipeline itself, second from querySet.read
+      expect(root[$internal].flush).toBeCalledTimes(2);
+    });
+
+    it('flushes after draw with timestamp writes', ({ root }) => {
+      const querySet = root.createQuerySet('timestamp', 2);
+
+      const pipeline = root
+        .withCompute(entryFn)
+        .createPipeline()
+        .withTimestampWrites({
+          querySet,
+          beginningOfPassWriteIndex: 0,
+          endOfPassWriteIndex: 1,
+        });
+
+      vi.spyOn(root[$internal], 'flush');
+
+      pipeline.dispatchWorkgroups(777);
+
+      expect(root[$internal].flush).toBeCalledTimes(1);
+    });
+  });
 });
diff --git a/packages/typegpu/tests/querySet.test.ts b/packages/typegpu/tests/querySet.test.ts
index 386147d8a..4dc112b4b 100644
--- a/packages/typegpu/tests/querySet.test.ts
+++ b/packages/typegpu/tests/querySet.test.ts
@@ -64,15 +64,17 @@ describe('TgpuQuerySet', () => {
       resolveBuffer,
       0,
     );
-    expect(commandEncoder.finish).toHaveBeenCalled();
-    expect(device.queue.submit).toHaveBeenCalled();
   });
 
-  it('should read from query set after resolution', async ({ root, device, commandEncoder }) => {
+  it('should read from query set after resolution', async ({ root, commandEncoder }) => {
+    const flushMock = vi.spyOn(root[$internal], 'flush');
+
     const querySet = root.createQuerySet('timestamp', 2);
 
     querySet.resolve();
 
+    expect(flushMock).toBeCalledTimes(0);
+
     const testData = new BigUint64Array([123n, 456n]);
     const readBuffer = querySet[$internal].readBuffer;
     readBuffer.getMappedRange = vi.fn(() => testData.buffer);
@@ -86,8 +88,7 @@ describe('TgpuQuerySet', () => {
       0,
       2 * BigUint64Array.BYTES_PER_ELEMENT,
     );
-    expect(device.queue.submit).toHaveBeenCalled();
-    expect(device.queue.onSubmittedWorkDone).toHaveBeenCalled();
+    expect(flushMock).toBeCalledTimes(1);
     expect(readBuffer.mapAsync).toHaveBeenCalledWith(GPUMapMode.READ);
     expect(readBuffer.getMappedRange).toHaveBeenCalled();
     expect(readBuffer.unmap).toHaveBeenCalled();
@@ -244,9 +245,7 @@ describe('TgpuQuerySet', () => {
     querySet.resolve();
     querySet.resolve();
 
-    expect(device.mock.createCommandEncoder).toHaveBeenCalledTimes(2);
+    expect(device.mock.createCommandEncoder).toHaveBeenCalledTimes(1);
     expect(commandEncoder.resolveQuerySet).toHaveBeenCalledTimes(2);
-    expect(commandEncoder.finish).toHaveBeenCalledTimes(2);
-    expect(device.queue.submit).toHaveBeenCalledTimes(2);
   });
 });
diff --git a/packages/typegpu/tests/renderPipeline.test.ts b/packages/typegpu/tests/renderPipeline.test.ts
index 2fbb77445..a94cd3405 100644
--- a/packages/typegpu/tests/renderPipeline.test.ts
+++ b/packages/typegpu/tests/renderPipeline.test.ts
@@ -750,7 +750,7 @@ describe('TgpuRenderPipeline', () => {
     );
   });
 
-  it('should onlly allow for drawIndexed with assigned index buffer', ({ root }) => {
+  it('should only allow for drawIndexed with assigned index buffer', ({ root }) => {
     const vertexFn = tgpu['~unstable']
       .vertexFn({
         out: { pos: d.builtin.position },
@@ -811,7 +811,10 @@ describe('TgpuRenderPipeline', () => {
     const querySet = root.createQuerySet('timestamp', 2);
     const indexBuffer = root.createBuffer(d.arrayOf(d.u16, 2)).$usage('index');
 
-    const beginRenderPassSpy = vi.spyOn(root.commandEncoder, 'beginRenderPass');
+    const beginRenderPassSpy = vi.spyOn(
+      root[$internal].commandEncoder,
+      'beginRenderPass',
+    );
 
     const pipeline = root
       .withVertex(vertexFn, {})
@@ -882,8 +885,14 @@ describe('TgpuRenderPipeline', () => {
 
     const querySet = root.createQuerySet('timestamp', 2);
     const indexBuffer = root.createBuffer(d.arrayOf(d.u16, 2)).$usage('index');
-    const beginRenderPassSpy = vi.spyOn(root.commandEncoder, 'beginRenderPass');
-    const resolveQuerySetSpy = vi.spyOn(root.commandEncoder, 'resolveQuerySet');
+    const beginRenderPassSpy = vi.spyOn(
+      root[$internal].commandEncoder,
+      'beginRenderPass',
+    );
+    const resolveQuerySetSpy = vi.spyOn(
+      root[$internal].commandEncoder,
+      'resolveQuerySet',
+    );
 
     const callback = vi.fn();
 
@@ -926,21 +935,23 @@ describe('TgpuRenderPipeline', () => {
       count: 2,
     });
 
-    expect(root.commandEncoder.beginRenderPass).toHaveBeenCalledWith({
-      colorAttachments: [
-        {
-          loadOp: 'clear',
-          storeOp: 'store',
-          view: expect.any(Object),
+    expect(root[$internal].commandEncoder.beginRenderPass).toHaveBeenCalledWith(
+      {
+        colorAttachments: [
+          {
+            loadOp: 'clear',
+            storeOp: 'store',
+            view: expect.any(Object),
+          },
+        ],
+        label: 'pipeline',
+        timestampWrites: {
+          beginningOfPassWriteIndex: 0,
+          endOfPassWriteIndex: 1,
+          querySet: querySet.querySet,
         },
-      ],
-      label: 'pipeline',
-      timestampWrites: {
-        beginningOfPassWriteIndex: 0,
-        endOfPassWriteIndex: 1,
-        querySet: querySet.querySet,
       },
-    });
+    );
 
     expect(resolveQuerySetSpy).toHaveBeenCalledWith(
       querySet.querySet,
@@ -966,6 +977,165 @@ describe('TgpuRenderPipeline', () => {
       },
     });
   });
+  describe('Flush', () => {
+    const vertexFn = tgpu['~unstable'].vertexFn({
+      out: { pos: d.builtin.position },
+    })('');
+
+    const fragmentFn = tgpu['~unstable'].fragmentFn({
+      out: { color: d.vec4f },
+    })('');
+
+    it('flushes after draw', ({ root }) => {
+      const pipeline = root
+        .withVertex(vertexFn, {})
+        .withFragment(fragmentFn, { color: { format: 'rgba8unorm' } })
+        .createPipeline()
+        .withColorAttachment({
+          color: {
+            view: {} as GPUTextureView,
+            loadOp: 'clear',
+            storeOp: 'store',
+          },
+        });
+
+      vi.spyOn(root[$internal], 'flush');
+
+      pipeline.draw(3);
+
+      expect(root[$internal].flush).toHaveBeenCalledTimes(1);
+    });
+    it('flushes after draw with performance callback', ({ root }) => {
+      const callback = vi.fn();
+
+      const pipeline = root
+        .withVertex(vertexFn, {})
+        .withFragment(fragmentFn, { color: { format: 'rgba8unorm' } })
+        .createPipeline()
+        .withColorAttachment({
+          color: {
+            view: {} as GPUTextureView,
+            loadOp: 'clear',
+            storeOp: 'store',
+          },
+        })
+        .withPerformanceCallback(callback);
+
+      vi.spyOn(root[$internal], 'flush');
+
+      pipeline.draw(3);
+
+      // first from pipeline, second from querySet.read
+      expect(root[$internal].flush).toBeCalledTimes(2);
+    });
+    it('flushes after draw with timestamp writes', ({ root }) => {
+      const querySet = root.createQuerySet('timestamp', 2);
+
+      const pipeline = root
+        .withVertex(vertexFn, {})
+        .withFragment(fragmentFn, { color: { format: 'rgba8unorm' } })
+        .createPipeline()
+        .withColorAttachment({
+          color: {
+            view: {} as GPUTextureView,
+            loadOp: 'clear',
+            storeOp: 'store',
+          },
+        })
+        .withTimestampWrites({
+          querySet,
+          beginningOfPassWriteIndex: 0,
+          endOfPassWriteIndex: 1,
+        });
+
+      vi.spyOn(root[$internal], 'flush');
+
+      pipeline.draw(3);
+
+      expect(root[$internal].flush).toBeCalledTimes(1);
+    });
+    it('flushes after drawIndexed', ({ root }) => {
+      const indexBuffer = root
+        .createBuffer(d.arrayOf(d.u32, 2))
+        .$usage('index');
+
+      const pipeline = root
+        .withVertex(vertexFn, {})
+        .withFragment(fragmentFn, { color: { format: 'rgba8unorm' } })
+        .createPipeline()
+        .withColorAttachment({
+          color: {
+            view: {} as GPUTextureView,
+            loadOp: 'clear',
+            storeOp: 'store',
+          },
+        })
+        .withIndexBuffer(indexBuffer);
+
+      vi.spyOn(root[$internal], 'flush');
+
+      pipeline.drawIndexed(3);
+
+      expect(root[$internal].flush).toBeCalledTimes(1);
+    });
+    it('flushes after drawIndexed with performance callback', ({ root }) => {
+      const indexBuffer = root
+        .createBuffer(d.arrayOf(d.u32, 2))
+        .$usage('index');
+      const callback = vi.fn();
+
+      const pipeline = root
+        .withVertex(vertexFn, {})
+        .withFragment(fragmentFn, { color: { format: 'rgba8unorm' } })
+        .createPipeline()
+        .withColorAttachment({
+          color: {
+            view: {} as GPUTextureView,
+            loadOp: 'clear',
+            storeOp: 'store',
+          },
+        })
+        .withIndexBuffer(indexBuffer)
+        .withPerformanceCallback(callback);
+
+      vi.spyOn(root[$internal], 'flush');
+
+      pipeline.drawIndexed(3);
+
+      // first from pipeline itself, second from querySet.read
+      expect(root[$internal].flush).toBeCalledTimes(2);
+    });
+    it('flushes after drawIndexed with timestamp writes', ({ root }) => {
+      const indexBuffer = root
+        .createBuffer(d.arrayOf(d.u32, 2))
+        .$usage('index');
+      const querySet = root.createQuerySet('timestamp', 2);
+
+      const pipeline = root
+        .withVertex(vertexFn, {})
+        .withFragment(fragmentFn, { color: { format: 'rgba8unorm' } })
+        .createPipeline()
+        .withColorAttachment({
+          color: {
+            view: {} as GPUTextureView,
+            loadOp: 'clear',
+            storeOp: 'store',
+          },
+        })
+        .withIndexBuffer(indexBuffer)
+        .withTimestampWrites({
+          querySet,
+          beginningOfPassWriteIndex: 0,
+          endOfPassWriteIndex: 1,
+        });
+
+      vi.spyOn(root[$internal], 'flush');
+
+      pipeline.drawIndexed(3);
+
+      expect(root[$internal].flush).toBeCalledTimes(1);
+    });
+  });
 });
 
 describe('matchUpVaryingLocations', () => {
diff --git a/packages/typegpu/tests/root.test.ts b/packages/typegpu/tests/root.test.ts
index d13953ad3..4dd4f26f4 100644
--- a/packages/typegpu/tests/root.test.ts
+++ b/packages/typegpu/tests/root.test.ts
@@ -3,6 +3,7 @@ import * as d from '../src/data/index.ts';
 import { Void } from '../src/data/wgslTypes.ts';
 import tgpu from '../src/index.ts';
 import { it } from './utils/extendedIt.ts';
+import { $internal } from '../src/shared/symbols.ts';
 
 describe('TgpuRoot', () => {
   describe('.createBuffer', () => {
@@ -291,6 +292,81 @@ describe('TgpuRoot', () => {
       expect(renderPassMock.setBindGroup).toBeCalledTimes(1);
       expect(renderPassMock.setBindGroup).toBeCalledWith(0, root.unwrap(group));
     });
+
+    it('is flushed automatically', ({ root, commandEncoder }) => {
+      const group = root.createBindGroup(layout, {
+        foo: root.createBuffer(d.f32).$usage('uniform'),
+      });
+
+      const pipeline = root
+        .withVertex(mainVertexUsing, {})
+        .withFragment(mainFragment, {})
+        .createPipeline()
+        .with(layout, group);
+
+      vi.spyOn(root[$internal], 'flush');
+
+      root.beginRenderPass(
+        {
+          colorAttachments: [],
+        },
+        (pass) => {
+          pass.setPipeline(pipeline);
+          pass.draw(1);
+        },
+      );
+
+      expect(root[$internal].flush).toBeCalledTimes(1);
+    });
+  });
+
+  describe('commandEncoder', () => {
+    it('is not null', ({ root }) => {
+      const commandEncoder = root[$internal].commandEncoder;
+      expect(commandEncoder).toBeDefined();
+    });
+    it('creates new commandEncoder when called for the first time', ({ device }) => {
+      const root = tgpu.initFromDevice({
+        device: device as unknown as GPUDevice,
+      });
+      root[$internal].commandEncoder;
+      expect(device.createCommandEncoder).toBeCalled();
+    });
+    it('does not create a new commandEncoder when called more than once without flush', ({ device }) => {
+      const root = tgpu.initFromDevice({
+        device: device as unknown as GPUDevice,
+      });
+      const commandEncoder1 = root[$internal].commandEncoder;
+      const commandEncoder2 = root[$internal].commandEncoder;
+      expect(device.createCommandEncoder).toBeCalledTimes(1);
+      expect(commandEncoder1).toBe(commandEncoder2);
+    });
+  });
+
+  describe('flush', () => {
+    it('should not throw when called without initializing a commandEncoder', ({ device }) => {
+      const root = tgpu.initFromDevice({
+        device: device as unknown as GPUDevice,
+      });
+      root[$internal].flush();
+    });
+    it('submits commandEncoder', ({ device }) => {
+      const root = tgpu.initFromDevice({
+        device: device as unknown as GPUDevice,
+      });
+      const commandEncoder = root[$internal].commandEncoder;
+      root[$internal].flush();
+      expect(device.queue.submit).toBeCalledWith([commandEncoder.finish()]);
+    });
+    it('clears commandEncoder', ({ device }) => {
+      const root = tgpu.initFromDevice({
+        device: device as unknown as GPUDevice,
+      });
+      root[$internal].commandEncoder;
+      root[$internal].flush();
+      root[$internal].commandEncoder;
+      expect(device.createCommandEncoder).toBeCalledTimes(2);
+    });
   });
 
   // TODO: Adapt the tests to the new API