software-mansion · cieplypolar · Sep 9, 2025 · Sep 10, 2025 · Sep 10, 2025 · Sep 10, 2025
diff --git a/apps/typegpu-docs/src/content/docs/fundamentals/pipelines.mdx b/apps/typegpu-docs/src/content/docs/fundamentals/pipelines.mdx
@@ -325,9 +325,7 @@ It accepts the number of vertices and optionally the instance count, first verte
 After calling the method, the shader is set for execution immediately.
 
 Compute pipelines are executed using the `dispatchWorkgroups` method, which accepts the number of workgroups in each dimension.
-Unlike render pipelines, after running this method, the execution is not submitted to the GPU immediately.
-In order to do so, `root['~unstable'].flush()` needs to be run.
-However, that is usually not necessary, as it is done automatically when trying to read the result of computation.
+Similarly to render pipelines, after calling the method, the shader is set for execution immediately.
 
 ### Drawing with `drawIndexed`
 
@@ -376,14 +374,14 @@ const mainFragment = tgpu['~unstable'].fragmentFn({
 const indexBuffer = root
   .createBuffer(d.arrayOf(d.u16, 6), [0, 2, 1, 0, 3, 2])
   .$usage('index');
-  
+
 const pipeline = root['~unstable']
   .withVertex(vertex, { color: vertexLayout.attrib })
   .withFragment(mainFragment, { format: presentationFormat })
   .createPipeline()
   .withIndexBuffer(indexBuffer);
 
-  pipeline 
+  pipeline
     .with(vertexLayout, colorBuffer)
     .drawIndexed(6);
 ```
@@ -394,6 +392,8 @@ The higher-level API has several limitations, therefore another way of executing
 
 `root['~unstable'].beginRenderPass` is a method that mirrors the WebGPU API, but enriches it with a direct TypeGPU resource support.
 
+The render pass is submitted automatically to the device queue.
+
 ```ts
 root['~unstable'].beginRenderPass(
   {
@@ -407,8 +407,6 @@ root['~unstable'].beginRenderPass(
     pass.draw(3);
   },
 );
-
-root['~unstable'].flush();
 ```
 
 It is also possible to access the underlying WebGPU resources for the TypeGPU pipelines, by calling `root.unwrap(pipeline)`.

diff --git a/apps/typegpu-docs/src/content/docs/fundamentals/utils.mdx b/apps/typegpu-docs/src/content/docs/fundamentals/utils.mdx
@@ -117,6 +117,141 @@ The default workgroup sizes are:
 The callback is not called if the global invocation id of a thread would exceed the size in any dimension.
 :::
 
+## *batch*
+By default, TypeGPU pipelines and render passes are submitted to the GPU immediately.
+If you want to give the GPU an opportunity to better utilize its resources,
+you can use the `batch` function.
+
+The `batch` function allows you to submit multiple pipelines and render passes to the GPU in a single call.
+Under the hood, it creates `GPUCommandEncoder`,
+records the commands from the provided callback function,
+and submits the resulting `GPUCommandBuffer` to the device.
+
+:::caution
+Read–write operations always flush the command encoder (flushing means finalizing the command encoder and submitting the resulting command buffer to the GPU) inside the batch environment. Outside they don't have to, everything is already flushed. We've prepared a table showing when a flush occurs (i.e., when a new command encoder is created). Keep this in mind when using `batch`.
+:::
+
+| Invocation                                      | Inside batch env    | Outside batch env  |
+|-------------------------------------------------|---------------------|--------------------|
+| `pipeline.draw`                                 | No Flush ❌         | Flush ✅           |
+| `pipeline.drawIndexed`                          | No Flush ❌         | Flush ✅           |
+| `pipeline.dispatchWorkgroups`                   | No Flush ❌         | Flush ✅           |
+| `pipeline.withPerformanceCallback`              | No Flush ❌ / ⚠️ ⬇️ | Flush ✅           |
+| `pipeline.withTimestampWrites`                  | No Flush ❌         | Flush ✅           |
+| `beginRenderPass`                               | No Flush ❌         | Flush ✅           |
+| `buffer.write`                                  | Flush ✅            | No Flush ❌        |
+| `buffer.writePartial`                           | Flush ✅            | No Flush ❌        |
+| `buffer.read`                                   | Flush ✅            | No Flush ❌        |
+| `querySet.resolve`                              | No Flush ❌         | No Flush ❌        |
+| `querySet.read`                                 | Flush ✅            | Flush ✅           |
+| `pipeline containing console.log`               | Flush ✅            | Flush ✅           |
+| `prepareDispatch().dispatch`                    | No flush ❌         | Flush ✅           |
+| `nested batch`                                  | Flush ✅            | N/A                |
+
+
+:::caution
+When you call a pipeline with a performance callback, the callback is invoked at the end of the batch. The timestamps themselves are not affected by the batching. They are still written at the beginning and/or end of the associated pipeline/render pass.
+:::
+
+:::caution
+`querySet.resolve` itself never flushes.
+- If you need to read from it, `querySet.read` will handle the flush.
+- If you use it on the GPU, another function will flush the existing `commandEncoder` with the `querySet.resolve` command.
+This works because we never create a new `commandEncoder` unless it's necessary.
+:::
+
+### Example
+```ts twoslash
+import tgpu from 'typegpu';
+import * as d from 'typegpu/data';
+
+const entryFn = tgpu['~unstable'].computeFn({ workgroupSize: [7] })(() => {});
+const vertexFn = tgpu['~unstable'].vertexFn({
+  out: { pos: d.builtin.position },
+})(() => {
+  return { pos: d.vec4f() };
+});
+const fragmentFn = tgpu['~unstable'].fragmentFn({
+  out: d.vec4f,
+})(() => d.vec4f());
+
+const root = await tgpu.init();
+
+const renderPipeline = root['~unstable']
+  .withVertex(vertexFn, {})
+  .withFragment(fragmentFn, { format: 'rgba8unorm' })
+  .createPipeline();
+
+const computePipeline = root['~unstable']
+  .withCompute(entryFn)
+  .createPipeline();
+
+const buffer = root.createBuffer(d.arrayOf(d.f32, 1024));
+
+// ---cut---
+const render = () => {
+  computePipeline.dispatchWorkgroups(7, 7, 7);
+  renderPipeline.draw(777);
+  // more operations...
+
+  buffer.write(Array.from({ length: 1024 }, () => Math.random()));
+  // force flush caused by write, new command encoder
+};
+
+root['~unstable'].batch(render);
+```
+
+:::note
+The batch callback must be synchronous.
+While this constraint may appear restrictive, the recommended approach is to divide
+the batch into multiple separate batches if asynchronous operations are required.
+:::
+
+### Nested batches
+Nested batches flush the existing command encoder and create a new one. Performance callbacks registered inside a nested batch are invoked at its end. For example:
+
+
+```ts twoslash
+import tgpu from 'typegpu';
+import * as d from 'typegpu/data';
+
+const vertexFn = tgpu['~unstable'].vertexFn({
+  out: { pos: d.builtin.position },
+})(() => {
+  return { pos: d.vec4f() };
+});
+const fragmentFn = tgpu['~unstable'].fragmentFn({
+  out: d.vec4f,
+})(() => d.vec4f());
+
+const root = await tgpu.init();
+
+const callback = () => {};
+
+const pipeline = root['~unstable']
+  .withVertex(vertexFn, {})
+  .withFragment(fragmentFn, { format: 'rgba8unorm' })
+  .createPipeline();
+
+const renderPipelineWithPerformanceCallback1 = pipeline.withPerformanceCallback(
+  callback,
+);
+const renderPipelineWithPerformanceCallback2 = pipeline.withPerformanceCallback(
+  callback,
+);
+
+// ---cut---
+root['~unstable'].batch(() => {
+  renderPipelineWithPerformanceCallback1.draw(1882);
+  root['~unstable'].batch(() => {
+    renderPipelineWithPerformanceCallback2.draw(1882);
+  });
+  // flush of the command encoder occurs, then callback2 is invoked
+});
+// flush of the (empty) command encoder occurs, then callback1 is invoked
+```
+
+
 ## *console.log*
 
 Yes, you read that correctly, TypeGPU implements logging to the console on the GPU!
@@ -135,7 +270,7 @@ const compute = prepareDispatch(root, () => {
   console.log('Call number', callCountMutable.$);
 });
 
-compute.dispatch(); 
+compute.dispatch();
 compute.dispatch();
 
 // Eventually...

diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1-typegpu.ts
@@ -58,6 +58,4 @@ onFrame(() => {
     ],
     vertexCount: 3,
   });
-
-  root.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1side-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step1side-typegpu.ts
@@ -77,6 +77,4 @@ onFrame(() => {
     ],
     vertexCount: 3,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step2-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step2-typegpu.ts
@@ -106,6 +106,4 @@ onFrame(() => {
     ],
     vertexCount: 3,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step3-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step3-typegpu.ts
@@ -115,6 +115,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step4-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step4-typegpu.ts
@@ -124,6 +124,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step5-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step5-typegpu.ts
@@ -149,6 +149,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step6-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step6-typegpu.ts
@@ -159,6 +159,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step7-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step7-typegpu.ts
@@ -172,6 +172,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step8-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step8-typegpu.ts
@@ -181,6 +181,4 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step9-typegpu.ts b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/code/step9-typegpu.ts
@@ -239,8 +239,6 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
 
 const options = {

diff --git a/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/index.mdx b/apps/typegpu-docs/src/content/docs/tutorials/triangle-to-boids/index.mdx
@@ -172,31 +172,6 @@ function frame() {
   requestAnimationFrame(frame);
 }
 
-frame();
-```
-<Aside type="danger" title="The code above draws nothing!">
-  The `renderPipeline.execute(...)` method encodes all the necessary commands, but they will not be queued until we either read from a buffer or call `root.flush()`.
-</Aside>
-
-```diff lang=ts
-function frame() {
-  renderPipeline.execute({
-    colorAttachments: [
-      {
-        view: context.getCurrentTexture().createView(),
-        clearValue: [0, 0, 0, 0],
-        loadOp: 'clear',
-        storeOp: 'store',
-      },
-    ],
-    vertexCount: 3,
-  });
-
-+  root.flush();
-
-  requestAnimationFrame(frame);
-}
-
 frame();
 ```
 
@@ -503,8 +478,6 @@ Let's explore both options.
       vertexCount: 3,
 +    instanceCount: triangleAmount,
     });
-
-    runtime.flush();
   });
   ```
 
@@ -562,8 +535,6 @@ Let's explore both options.
       vertexCount: 3,
 +    instanceCount: triangleAmount,
     });
-
-    runtime.flush();
   });
   ```
 
@@ -610,8 +581,6 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
 ```
 
@@ -812,8 +781,6 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
 ```
 
@@ -946,8 +913,6 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
 ```
 
@@ -1127,8 +1092,6 @@ onFrame(() => {
     vertexCount: 3,
     instanceCount: triangleAmount,
   });
-
-  runtime.flush();
 });
 ```
 
@@ -1500,4 +1463,3 @@ import step9webgpu from 'code/step9-webgpu.ts?raw';
 
 Congratulations! You've successfully implemented the boids flocking algorithm in WebGPU using TypeGPU.
 Along the way, you learned about creating and using a TypeGPU runtime, writing shader code, managing buffers, creating pipelines, and using slots. For more information, refer to the TypeGPU documentation. Thank you for following along and happy coding!
-
diff --git a/apps/typegpu-docs/src/examples/image-processing/blur/index.ts b/apps/typegpu-docs/src/examples/image-processing/blur/index.ts
@@ -203,7 +203,6 @@ function render() {
     loadOp: 'clear',
     storeOp: 'store',
   }).draw(3);
-  root['~unstable'].flush();
 }
 render();
-Original file line number
+Diff line change
@@ Expand Up / @@ -58,6 +58,4 @@ onFrame(() => { @@
         ],
         vertexCount: 3,
       });
-      root.flush();
     });