From 8c84bb29659bdec2a842d0c066d3952d4f5c6791 Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 15:00:04 +0200 Subject: [PATCH 01/12] fix(examples): batch performance-overlay sprites via a Container The overlay rendered 1600 sprites with one context.render() per sprite, emitting one draw call each. Adding them to a Container and rendering it once batches them into a single draw call (7.8ms -> 1.8ms on the spike). --- examples/debug-layer/performance-overlay.js | 12 +++++++++--- examples/debug-layer/performance-overlay.ts | 11 +++++++++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/examples/debug-layer/performance-overlay.js b/examples/debug-layer/performance-overlay.js index 275375b1..7534cc11 100644 --- a/examples/debug-layer/performance-overlay.js +++ b/examples/debug-layer/performance-overlay.js @@ -1,5 +1,5 @@ // Auto-generated from performance-overlay.ts — edit the .ts source, not this file. -import { Application, Color, Keyboard, Scene, Sprite, Texture } from '@codexo/exojs'; +import { Application, Color, Container, Keyboard, Scene, Sprite, Texture } from '@codexo/exojs'; import { DebugOverlay } from '@codexo/exojs/debug'; const app = new Application({ canvas: { @@ -17,14 +17,21 @@ const debug = new DebugOverlay(app); debug.layers.performance.visible = true; class PerformanceOverlayScene extends Scene { sprites; + layer; async load(loader) { await loader.load(Texture, { bunny: 'image/ship-a.png' }); } init(loader) { const { width, height } = this.app.canvas; + // All sprites share one texture, so adding them to a single container and + // rendering it once lets the renderer batch them into a single draw call. + // Rendering each sprite with its own `context.render(sprite)` call would + // instead emit one draw call per sprite and tank the frame rate. + this.layer = new Container(); this.sprites = Array.from({ length: 1600 }, () => { const sprite = new Sprite(loader.get(Texture, 'bunny')).setAnchor(0.5).setScale(0.25); sprite.setPosition(Math.random() * width, Math.random() * height); + this.layer.addChild(sprite); return { sprite, vx: (Math.random() - 0.5) * 120, @@ -47,8 +54,7 @@ class PerformanceOverlayScene extends Scene { } draw(context) { context.backend.clear(); - for (const { sprite } of this.sprites) - context.render(sprite); + context.render(this.layer); } } app.start(new PerformanceOverlayScene()); diff --git a/examples/debug-layer/performance-overlay.ts b/examples/debug-layer/performance-overlay.ts index 7ff4b2b8..b5b1d415 100644 --- a/examples/debug-layer/performance-overlay.ts +++ b/examples/debug-layer/performance-overlay.ts @@ -1,4 +1,4 @@ -import { Application, Color, Keyboard, Scene, Sprite, Texture } from '@codexo/exojs'; +import { Application, Color, Container, Keyboard, Scene, Sprite, Texture } from '@codexo/exojs'; import { DebugOverlay } from '@codexo/exojs/debug'; const app = new Application({ @@ -19,6 +19,7 @@ debug.layers.performance.visible = true; class PerformanceOverlayScene extends Scene { private sprites!: { sprite: Sprite; vx: number; vy: number }[]; + private layer!: Container; override async load(loader): Promise { await loader.load(Texture, { bunny: 'image/ship-a.png' }); @@ -27,9 +28,15 @@ class PerformanceOverlayScene extends Scene { override init(loader): void { const { width, height } = this.app.canvas; + // All sprites share one texture, so adding them to a single container and + // rendering it once lets the renderer batch them into a single draw call. + // Rendering each sprite with its own `context.render(sprite)` call would + // instead emit one draw call per sprite and tank the frame rate. + this.layer = new Container(); this.sprites = Array.from({ length: 1600 }, () => { const sprite = new Sprite(loader.get(Texture, 'bunny')).setAnchor(0.5).setScale(0.25); sprite.setPosition(Math.random() * width, Math.random() * height); + this.layer.addChild(sprite); return { sprite, vx: (Math.random() - 0.5) * 120, @@ -53,7 +60,7 @@ class PerformanceOverlayScene extends Scene { override draw(context): void { context.backend.clear(); - for (const { sprite } of this.sprites) context.render(sprite); + context.render(this.layer); } } From cea0c6dc438375dc2ec1841256d92cd9f0439a2d Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 15:03:12 +0200 Subject: [PATCH 02/12] test(rendering): add cross-call sprite batching regression test (red) --- test/perf/rendering/harness.ts | 47 +++++++++++++++++++ test/perf/rendering/structural-sprite.test.ts | 21 ++++++++- 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/test/perf/rendering/harness.ts b/test/perf/rendering/harness.ts index cd21b523..67da7aae 100644 --- a/test/perf/rendering/harness.ts +++ b/test/perf/rendering/harness.ts @@ -9,6 +9,7 @@ */ import type { RenderNode } from '#rendering/RenderNode'; import type { View } from '#rendering/View'; +import { playRenderTree } from '#rendering/plan/playRenderTree'; import { WebGl2Backend } from '#rendering/webgl2/WebGl2Backend'; import { wireCoreRenderers } from '../../rendering/browser/_coreRenderers'; @@ -161,3 +162,49 @@ export const measureSteadyFrame = (harness: WebGl2Harness, root: RenderNode, war return metrics!; }; + +/** + * Render each node via its own setView + playRenderTree (exactly what + * RenderingContext.render does per call), then flush once — i.e. the + * "one context.render() per drawable in a loop" pattern. Returns the metrics of + * the final warmed frame. + */ +export const measureCrossCallFrame = (harness: WebGl2Harness, nodes: readonly RenderNode[], warmupFrames = 2): FrameMetrics => { + const { backend, recorder } = harness; + let metrics: FrameMetrics | null = null; + + for (let i = 0; i <= warmupFrames; i++) { + backend.resetStats(); + recorder.reset(); + backend.clear(); + + const view = backend.view; + for (const node of nodes) { + backend.setView(view); + playRenderTree(node, backend); + } + backend.flush(); + + const stats = backend.stats; + metrics = { + drawCalls: stats.drawCalls, + batches: stats.batches, + instances: recorder.instances, + visibleNodes: stats.submittedNodes, + culledNodes: stats.culledNodes, + renderPasses: stats.renderPasses, + textureBinds: recorder.textureBinds, + samplerBinds: recorder.samplerBinds, + programChanges: recorder.programChanges, + blendChanges: recorder.blendChanges, + bufferUploads: recorder.bufferUploads, + bufferReallocations: recorder.bufferReallocations, + uploadedBufferBytes: recorder.bufferUploadBytes, + transformRows: recorder.transformRows, + transformUploads: recorder.transformUploads, + transformUploadBytes: recorder.transformUploadBytes, + }; + } + + return metrics!; +}; diff --git a/test/perf/rendering/structural-sprite.test.ts b/test/perf/rendering/structural-sprite.test.ts index 00fd1cb9..9a00c99d 100644 --- a/test/perf/rendering/structural-sprite.test.ts +++ b/test/perf/rendering/structural-sprite.test.ts @@ -14,7 +14,7 @@ import { Sprite } from '#rendering/sprite/Sprite'; import type { BlendModes } from '#rendering/types'; import { buildSpriteScene, makeTextures } from './fixtures'; -import { createWebGl2Harness, measureSteadyFrame, type WebGl2Harness } from './harness'; +import { createWebGl2Harness, measureCrossCallFrame, measureSteadyFrame, type WebGl2Harness } from './harness'; const withHarness = (fn: (harness: WebGl2Harness) => void): void => { const harness = createWebGl2Harness(); @@ -138,6 +138,25 @@ describe('structural — Sprite', () => { }); }); + it('1000 per-call renders / 1 texture → one draw (cross-call batching)', () => { + withHarness(harness => { + const [texture] = makeTextures(1); + const sprites = Array.from({ length: 1000 }, (_, i) => { + const sprite = new Sprite(texture); + sprite.setPosition(i % 100, Math.floor(i / 100)); + return sprite; + }); + + const m = measureCrossCallFrame(harness, sprites, 2); + + expect(m.drawCalls).toBe(1); + expect(m.instances).toBe(1000); + expect(m.visibleNodes).toBe(1000); + + for (const sprite of sprites) sprite.destroy(); + }); + }); + it('static transforms skip re-upload; moving transforms re-upload all rows', () => { withHarness(harness => { const staticScene = buildSpriteScene({ count: 500, textures: makeTextures(1) }); From 0775036f732f55b88fe91d8a6eecac6daf8c3bd9 Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 15:08:42 +0200 Subject: [PATCH 03/12] perf(rendering): frame-scoped draw-plan lifecycle for cross-call batching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit setView flushes only on real view change; the transform buffer resets once per frame; the plan builder bases node indices at the frame buffer count; nested plans isolate their rows. Per-call renders now batch (1000 -> 1 draw). Leaves the barrier-path allocation gate red — fixed in the next commit. --- src/rendering/TransformBuffer.ts | 24 ++++++++++++++ src/rendering/plan/RenderPlanBuilder.ts | 8 +++-- src/rendering/webgl2/WebGl2Backend.ts | 43 +++++++++++++++++++++---- 3 files changed, 67 insertions(+), 8 deletions(-) diff --git a/src/rendering/TransformBuffer.ts b/src/rendering/TransformBuffer.ts index 8482f134..4b7da7f7 100644 --- a/src/rendering/TransformBuffer.ts +++ b/src/rendering/TransformBuffer.ts @@ -94,6 +94,11 @@ export class TransformBuffer { return this._version; } + /** Running content hash of the rows written since begin(). @internal */ + public get frameHash(): number { + return this._frameHash; + } + public begin(expectedCount = 0): this { if (expectedCount > 0) { this._ensureCapacity(expectedCount); @@ -117,6 +122,25 @@ export class TransformBuffer { return slot; } + /** + * Rewind the write cursor to `count`, freeing the rows above it for reuse, and + * (optionally) restore the running content hash to its pre-rewind value so the + * freed rows' writes don't linger in the hash and trigger spurious re-uploads. + * Used by nested draw plans (filters / cacheAsBitmap) to isolate their slots. + * @internal + */ + public rewindTo(count: number, frameHash?: number): this { + if (count >= 0 && count < this._count) { + this._count = count; + + if (frameHash !== undefined) { + this._frameHash = frameHash >>> 0; + } + } + + return this; + } + public write(slot: number, transform: Matrix, tint: Color): this { if (!Number.isInteger(slot) || slot < 0) { throw new Error(`TransformBuffer slot must be a non-negative integer (got ${slot}).`); diff --git a/src/rendering/plan/RenderPlanBuilder.ts b/src/rendering/plan/RenderPlanBuilder.ts index db9cbe41..58053f25 100644 --- a/src/rendering/plan/RenderPlanBuilder.ts +++ b/src/rendering/plan/RenderPlanBuilder.ts @@ -93,7 +93,11 @@ export class RenderPlanBuilder { this._barrierEntryPoolCursor = 0; this._scopeStack.length = 0; this._hasPending = false; - this._nodeIndex = 0; + // Base this plan's node indices after whatever earlier render() calls already + // wrote into the frame-scoped transform buffer, so every draw across all + // render() calls in the frame references a distinct slot and can batch. + const frameBase = (backend as { transformBufferCount?: number }).transformBufferCount ?? 0; + this._nodeIndex = frameBase; const rootScope = this._acquireGroupScope(false); @@ -110,7 +114,7 @@ export class RenderPlanBuilder { }); } - this._plan.nodeCount = this._nodeIndex; + this._plan.nodeCount = this._nodeIndex - frameBase; return this._plan; } diff --git a/src/rendering/webgl2/WebGl2Backend.ts b/src/rendering/webgl2/WebGl2Backend.ts index e58d6a3a..d25f7e42 100644 --- a/src/rendering/webgl2/WebGl2Backend.ts +++ b/src/rendering/webgl2/WebGl2Backend.ts @@ -181,6 +181,8 @@ export class WebGl2Backend implements RenderBackend { private _transformTextureCount = -1; private _activeDrawCommand: DrawCommand | null = null; private _drawPlanDepth = 0; + private readonly _planBaseStack: number[] = []; + private readonly _planHashStack: number[] = []; public constructor(app: Application) { const canvasOptions = app.options.canvas ?? {}; @@ -279,13 +281,27 @@ export class WebGl2Backend implements RenderBackend { public resetStats(): this { resetRenderStats(this._stats); + // The transform buffer is frame-scoped: reset it once per frame here (was + // previously reset per render() call in _beginDrawPlan). + this._transformBuffer.begin(); return this; } + /** Frame-global slot base the plan builder indexes from. @internal */ + public get transformBufferCount(): number { + return this._transformBuffer.count; + } + /** @internal */ - public _beginDrawPlan(nodeCount: number): void { - this._transformBuffer.begin(nodeCount); + public _beginDrawPlan(_nodeCount: number): void { + // Do NOT reset the transform buffer here — it is frame-scoped (reset in + // resetStats). The builder already based this plan's node indices at the + // current buffer count, so writes land in fresh frame-global slots and + // batches survive across render() calls. Remember this plan's base so a + // nested plan can free its rows on end. + this._planBaseStack.push(this._transformBuffer.count); + this._planHashStack.push(this._transformBuffer.frameHash); this._activeDrawCommand = null; this._drawPlanDepth++; } @@ -395,13 +411,23 @@ export class WebGl2Backend implements RenderBackend { public _endDrawPlan(): void { this._activeDrawCommand = null; + const planBase = this._planBaseStack.pop() ?? 0; + const planHash = this._planHashStack.pop() ?? 0; + if (this._drawPlanDepth > 0) { this._drawPlanDepth--; } - // Only assert balance at the outermost plan: cacheAsBitmap draws a cache - // sprite via a nested render(), whose inner _endDrawPlan sees the still-open - // outer clips — those are not leaks. + // A nested plan (filter / cacheAsBitmap) just ended: flush its draws, then + // free its transform rows so the frame-scoped buffer only grows with + // top-level render() calls. Top-level plans (depth back to 0) keep their rows + // so cross-call batching survives to the frame-end flush. + if (this._drawPlanDepth > 0) { + this._flushActiveRenderer(); + this._transformBuffer.rewindTo(planBase, planHash); + } + + // Only assert balance at the outermost plan. if (this._drawPlanDepth === 0) { this._assertBalancedStencil(); } @@ -715,7 +741,12 @@ export class WebGl2Backend implements RenderBackend { } public setView(view: View | null): this { - this._flushActiveRenderer(); + // Only flush the open batch when the view actually changes. The unconditional + // flush forced one draw call per render() call (each render() re-applies the + // same camera view), defeating cross-call batching. + if (this._renderTarget.view !== view) { + this._flushActiveRenderer(); + } this._renderTarget.setView(view); this._bindRenderTarget(this._renderTarget); From fc4c10ce0d4db3396936e1fed4a4d2cf93368c46 Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 15:22:52 +0200 Subject: [PATCH 04/12] perf(rendering): delta-upload transform texture rows per flush A frame-scoped buffer made barrier flushes re-upload a growing buffer (O(N^2)). Uploading only [uploadedRows, count) per flush via commitRect makes it O(N) while keeping the cross-frame hash-guard skip. Fixes the effect-barrier gate. --- src/rendering/webgl2/WebGl2Backend.ts | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/rendering/webgl2/WebGl2Backend.ts b/src/rendering/webgl2/WebGl2Backend.ts index d25f7e42..9fc23e1b 100644 --- a/src/rendering/webgl2/WebGl2Backend.ts +++ b/src/rendering/webgl2/WebGl2Backend.ts @@ -183,6 +183,8 @@ export class WebGl2Backend implements RenderBackend { private _drawPlanDepth = 0; private readonly _planBaseStack: number[] = []; private readonly _planHashStack: number[] = []; + /** Rows of the transform texture already uploaded this frame (delta-upload guard). */ + private _uploadedRows = 0; public constructor(app: Application) { const canvasOptions = app.options.canvas ?? {}; @@ -284,6 +286,7 @@ export class WebGl2Backend implements RenderBackend { // The transform buffer is frame-scoped: reset it once per frame here (was // previously reset per render() call in _beginDrawPlan). this._transformBuffer.begin(); + this._uploadedRows = 0; return this; } @@ -425,6 +428,10 @@ export class WebGl2Backend implements RenderBackend { if (this._drawPlanDepth > 0) { this._flushActiveRenderer(); this._transformBuffer.rewindTo(planBase, planHash); + + if (planBase < this._uploadedRows) { + this._uploadedRows = planBase; + } } // Only assert balance at the outermost plan. @@ -826,6 +833,7 @@ export class WebGl2Backend implements RenderBackend { }); this._transformTextureHash = 0; this._transformTextureCount = -1; + this._uploadedRows = 0; } const snapshot = this._transformBuffer.commitSnapshot(requiredCount); @@ -836,10 +844,17 @@ export class WebGl2Backend implements RenderBackend { } if (snapshot.changed || snapshot.count !== this._transformTextureCount || snapshot.hash !== this._transformTextureHash) { - nextTransformTexture.commitRect(0, 0, 3, snapshot.count); - this._transformBuffer.recordUpload(snapshot.count); - this._transformTextureHash = snapshot.hash; + const firstRow = Math.min(this._uploadedRows, snapshot.count); + const rowCount = snapshot.count - firstRow; + + if (rowCount > 0) { + nextTransformTexture.commitRect(0, firstRow, 3, rowCount); + this._transformBuffer.recordUpload(rowCount); + } + + this._uploadedRows = snapshot.count; this._transformTextureCount = snapshot.count; + this._transformTextureHash = snapshot.hash; } return this.bindTexture(nextTransformTexture, unit); From f10e36c5a52a7511ea3a32ba2f9f84e7898f2956 Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 15:28:12 +0200 Subject: [PATCH 05/12] test(rendering): per-call render output matches Container render --- test/perf/rendering/structural-sprite.test.ts | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/perf/rendering/structural-sprite.test.ts b/test/perf/rendering/structural-sprite.test.ts index 9a00c99d..2ca4440c 100644 --- a/test/perf/rendering/structural-sprite.test.ts +++ b/test/perf/rendering/structural-sprite.test.ts @@ -183,4 +183,26 @@ describe('structural — Sprite', () => { root.destroy(); }); }); + + it('per-call renders match a Container render (same draws, instances, transform rows)', () => { + withHarness(harness => { + const [texture] = makeTextures(1); + + const loose = Array.from({ length: 500 }, (_, i) => { + const sprite = new Sprite(texture); + sprite.setPosition((i * 7) % 640, (i * 13) % 480); + return sprite; + }); + const crossCall = measureCrossCallFrame(harness, loose, 2); + for (const sprite of loose) sprite.destroy(); + + const { root } = buildSpriteScene({ count: 500, textures: makeTextures(1) }); + const container = measureSteadyFrame(harness, root, 2); + root.destroy(); + + expect(crossCall.drawCalls).toBe(container.drawCalls); + expect(crossCall.instances).toBe(container.instances); + expect(crossCall.transformRows).toBe(container.transformRows); + }); + }); }); From fc413efb4ecfcda1357ea5cc5f47ddfd01d28fae Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 15:42:22 +0200 Subject: [PATCH 06/12] fix(rendering): upload exact dirty transform-row range, not a high-water mark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task 3's delta upload tracked only the highest uploaded row, so a slot reused below that mark (a filter composite reusing a row a nested plan had rewound) was never re-uploaded, leaving stale transform data — the filter-boundary browser test rendered the wrong color. Track the exact written-slot range [dirtyMin, dirtyMax] in TransformBuffer instead; the delta upload pushes precisely the changed rows regardless of reuse. Restores filter-boundary (browser 149/149), keeps effect-barrier under budget and cross-call batching. --- src/rendering/TransformBuffer.ts | 40 +++++++++++++++++++++++++++ src/rendering/webgl2/WebGl2Backend.ts | 15 +++------- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/src/rendering/TransformBuffer.ts b/src/rendering/TransformBuffer.ts index 4b7da7f7..f63b0181 100644 --- a/src/rendering/TransformBuffer.ts +++ b/src/rendering/TransformBuffer.ts @@ -38,6 +38,12 @@ export class TransformBuffer { private _skippedWriteCount = 0; private _uploadCount = 0; private _uploadedRecordCount = 0; + // Dirty row range [_dirtyMin, _dirtyMax] written since the last upload — the + // exact rows a delta upload must push. Empty when `_dirtyMax < _dirtyMin`. + // Tracked by slot (not a high-water mark) so a reused slot (nested-plan + // rewind, filter composite) is correctly re-uploaded. + private _dirtyMin = 0; + private _dirtyMax = -1; public get count(): number { return this._count; @@ -110,6 +116,8 @@ export class TransformBuffer { this._skippedWriteCount = 0; this._uploadCount = 0; this._uploadedRecordCount = 0; + this._dirtyMin = 0; + this._dirtyMax = -1; return this; } @@ -141,6 +149,28 @@ export class TransformBuffer { return this; } + /** + * Consume the dirty row range written since the last upload, clamped to + * `[0, maxCount)`, and clear it. Returns the contiguous `[firstRow, firstRow + + * rowCount)` a delta upload should push (`rowCount === 0` when nothing is + * dirty). The backend calls this at its upload boundary. + * @internal + */ + public consumeDirtyRange(maxCount: number): { firstRow: number; rowCount: number } { + if (this._dirtyMax < this._dirtyMin) { + return { firstRow: 0, rowCount: 0 }; + } + + const firstRow = Math.max(0, this._dirtyMin); + const lastRow = Math.min(this._dirtyMax, maxCount - 1); + const rowCount = lastRow >= firstRow ? lastRow - firstRow + 1 : 0; + + this._dirtyMin = 0; + this._dirtyMax = -1; + + return { firstRow, rowCount }; + } + public write(slot: number, transform: Matrix, tint: Color): this { if (!Number.isInteger(slot) || slot < 0) { throw new Error(`TransformBuffer slot must be a non-negative integer (got ${slot}).`); @@ -168,6 +198,16 @@ export class TransformBuffer { this._count = slot + 1; } + // Track the exact written-slot range so a delta upload pushes precisely the + // changed rows — including a slot reused below the high-water mark. + if (this._dirtyMax < this._dirtyMin) { + this._dirtyMin = slot; + this._dirtyMax = slot; + } else { + if (slot < this._dirtyMin) this._dirtyMin = slot; + if (slot > this._dirtyMax) this._dirtyMax = slot; + } + this._frameHash = this._mix(this._frameHash, slot); for (let i = 0; i < floatsPerSlot; i++) { diff --git a/src/rendering/webgl2/WebGl2Backend.ts b/src/rendering/webgl2/WebGl2Backend.ts index 9fc23e1b..87c1a5c5 100644 --- a/src/rendering/webgl2/WebGl2Backend.ts +++ b/src/rendering/webgl2/WebGl2Backend.ts @@ -183,8 +183,6 @@ export class WebGl2Backend implements RenderBackend { private _drawPlanDepth = 0; private readonly _planBaseStack: number[] = []; private readonly _planHashStack: number[] = []; - /** Rows of the transform texture already uploaded this frame (delta-upload guard). */ - private _uploadedRows = 0; public constructor(app: Application) { const canvasOptions = app.options.canvas ?? {}; @@ -286,7 +284,6 @@ export class WebGl2Backend implements RenderBackend { // The transform buffer is frame-scoped: reset it once per frame here (was // previously reset per render() call in _beginDrawPlan). this._transformBuffer.begin(); - this._uploadedRows = 0; return this; } @@ -428,10 +425,6 @@ export class WebGl2Backend implements RenderBackend { if (this._drawPlanDepth > 0) { this._flushActiveRenderer(); this._transformBuffer.rewindTo(planBase, planHash); - - if (planBase < this._uploadedRows) { - this._uploadedRows = planBase; - } } // Only assert balance at the outermost plan. @@ -833,7 +826,6 @@ export class WebGl2Backend implements RenderBackend { }); this._transformTextureHash = 0; this._transformTextureCount = -1; - this._uploadedRows = 0; } const snapshot = this._transformBuffer.commitSnapshot(requiredCount); @@ -844,15 +836,16 @@ export class WebGl2Backend implements RenderBackend { } if (snapshot.changed || snapshot.count !== this._transformTextureCount || snapshot.hash !== this._transformTextureHash) { - const firstRow = Math.min(this._uploadedRows, snapshot.count); - const rowCount = snapshot.count - firstRow; + // Upload only the rows actually written since the last upload (delta), so + // barrier-heavy frames don't re-upload the whole growing buffer. A reused + // slot below the high-water mark is in the dirty range, so it re-uploads. + const { firstRow, rowCount } = this._transformBuffer.consumeDirtyRange(snapshot.count); if (rowCount > 0) { nextTransformTexture.commitRect(0, firstRow, 3, rowCount); this._transformBuffer.recordUpload(rowCount); } - this._uploadedRows = snapshot.count; this._transformTextureCount = snapshot.count; this._transformTextureHash = snapshot.hash; } From 52d943906eb99ffef11427eebec9a5583d97aec4 Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 16:12:33 +0200 Subject: [PATCH 07/12] perf(rendering): WebGPU parity for frame-scoped cross-call batching Mirror the WebGl2 backend's Tasks 2-4 lifecycle changes onto WebGPU: - TransformBuffer is now frame-scoped (reset in resetStats, not per plan) - Add transformBufferCount getter so RenderPlanBuilder offsets node indices correctly for WebGPU (previously fell back to 0 -> no cross-call batching) - _beginDrawPlan: push base/hash stacks instead of resetting; reserve is based on frame-global count + plan nodes to avoid mid-frame reallocations - _endDrawPlan: pop stacks; nested plans flush + rewindTo to free their rows - setView: conditional flush (only on real view change) to stop breaking batches on every render() call that re-applies the same camera view - WebGpuTransformStorage.getBuffer: delta upload via consumeDirtyRange instead of full-buffer writeBuffer on every flush boundary --- src/rendering/webgpu/WebGpuBackend.ts | 48 ++++++++++++++++--- .../webgpu/WebGpuTransformStorage.ts | 22 +++++++-- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/src/rendering/webgpu/WebGpuBackend.ts b/src/rendering/webgpu/WebGpuBackend.ts index 5d91790e..2a3f187a 100644 --- a/src/rendering/webgpu/WebGpuBackend.ts +++ b/src/rendering/webgpu/WebGpuBackend.ts @@ -127,6 +127,8 @@ export class WebGpuBackend implements RenderBackend { private _activeDrawCommand: DrawCommand | null = null; private _passCoordinatorInstance: WebGpuPassCoordinator | null = null; private _drawPlanDepth = 0; + private readonly _planBaseStack: number[] = []; + private readonly _planHashStack: number[] = []; public constructor(app: Application) { const canvasOptions = app.options.canvas ?? {}; @@ -243,22 +245,37 @@ export class WebGpuBackend implements RenderBackend { public resetStats(): this { resetRenderStats(this._stats); + // The transform buffer is frame-scoped: reset it once per frame here (was + // previously reset per render() call in _beginDrawPlan). + this._getTransformStorage().buffer.begin(); return this; } + /** Frame-global slot base the plan builder indexes from. @internal */ + public get transformBufferCount(): number { + return this._getTransformStorage().buffer.count; + } + /** @internal */ public _beginDrawPlan(nodeCount: number): void { const storage = this._getTransformStorage(); - storage.begin(nodeCount); + // Do NOT reset the transform buffer here — it is frame-scoped (reset in + // resetStats). The builder already based this plan's node indices at the + // current buffer count, so writes land in fresh frame-global slots and + // batches survive across render() calls. Remember this plan's base so a + // nested plan can free its rows on end. + this._planBaseStack.push(storage.buffer.count); + this._planHashStack.push(storage.buffer.frameHash); // Pre-allocate the GPU storage buffer for the full plan before any group - // flush runs. Without this, a later flush with a higher maxNodeIndex would - // destroy and replace the buffer mid-frame while earlier command buffers - // may still reference the old allocation. - if (nodeCount > 0 && this._device !== null && !this._deviceLost) { - storage.reserve(this._device, nodeCount, this._accountant); + // flush runs. Base the reservation on the frame-global count + this plan's + // nodes so the buffer grows to cover both pre-existing frame rows and new rows. + const reserveCount = storage.buffer.count + nodeCount; + + if (reserveCount > 0 && this._device !== null && !this._deviceLost) { + storage.reserve(this._device, reserveCount, this._accountant); } this._activeDrawCommand = null; @@ -311,10 +328,22 @@ export class WebGpuBackend implements RenderBackend { public _endDrawPlan(): void { this._activeDrawCommand = null; + const planBase = this._planBaseStack.pop() ?? 0; + const planHash = this._planHashStack.pop() ?? 0; + if (this._drawPlanDepth > 0) { this._drawPlanDepth--; } + // A nested plan (filter / cacheAsBitmap) just ended: flush its draws, then + // free its transform rows so the frame-scoped buffer only grows with + // top-level render() calls. Top-level plans (depth back to 0) keep their rows + // so cross-call batching survives to the frame-end flush. + if (this._drawPlanDepth > 0) { + this._flushActiveRenderer(); + this._getTransformStorage().buffer.rewindTo(planBase, planHash); + } + // Only assert balance at the outermost plan: a nested render() (e.g. // cacheAsBitmap drawing its cache sprite) sees the still-open outer clips, // which are not leaks. @@ -594,7 +623,12 @@ export class WebGpuBackend implements RenderBackend { } public setView(view: View | null): this { - this._flushActiveRenderer(); + // Only flush the open batch when the view actually changes. The unconditional + // flush forced one draw call per render() call (each render() re-applies the + // same camera view), defeating cross-call batching. + if (this._renderTarget.view !== view) { + this._flushActiveRenderer(); + } this._renderTarget.setView(view); return this; diff --git a/src/rendering/webgpu/WebGpuTransformStorage.ts b/src/rendering/webgpu/WebGpuTransformStorage.ts index 72a3c9d8..837e2704 100644 --- a/src/rendering/webgpu/WebGpuTransformStorage.ts +++ b/src/rendering/webgpu/WebGpuTransformStorage.ts @@ -99,11 +99,25 @@ export class WebGpuTransformStorage { } if (snapshot.changed || snapshot.hash !== this._storageHash || snapshot.count !== this._storageCount) { - const bytes = snapshot.count * slotFloatCount * Float32Array.BYTES_PER_ELEMENT; + // Upload only the rows actually written since the last upload (delta), so + // barrier-heavy frames don't re-upload the whole growing buffer. A reused + // slot below the high-water mark is in the dirty range, so it re-uploads. + const { firstRow, rowCount } = this._buffer.consumeDirtyRange(snapshot.count); + + if (rowCount > 0) { + const slotBytes = slotFloatCount * Float32Array.BYTES_PER_ELEMENT; + + device.queue.writeBuffer( + this._storageBuffer!, + firstRow * slotBytes, + this._buffer.data.buffer, + this._buffer.data.byteOffset + firstRow * slotBytes, + rowCount * slotBytes, + ); + this._buffer.recordUpload(rowCount); + this._accountant?.recordBufferUpload(rowCount * slotBytes); + } - device.queue.writeBuffer(this._storageBuffer!, 0, this._buffer.data.buffer, this._buffer.data.byteOffset, bytes); - this._buffer.recordUpload(snapshot.count); - this._accountant?.recordBufferUpload(bytes); this._storageHash = snapshot.hash; this._storageCount = snapshot.count; } From 4d69cee54ce39837c32eef75cf8386f1e110627f Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 16:29:56 +0200 Subject: [PATCH 08/12] fix(webgpu): full re-upload after storage-buffer grow; remove dead begin() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After _growBuffer creates a new empty GPUBuffer, set _needsFullUpload=true. In getBuffer, always consumeDirtyRange first (clears stale range), then branch: full [0,count) upload when _needsFullUpload, else delta rowCount>0. Mirrors WebGl2's full-upload-on-grow so mid-frame reallocated slots are never read as uninitialized transforms by the shader. Also removes the dead begin(nodeCount) wrapper — callers use buffer.begin() directly. --- .../webgpu/WebGpuTransformStorage.ts | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/rendering/webgpu/WebGpuTransformStorage.ts b/src/rendering/webgpu/WebGpuTransformStorage.ts index 837e2704..957061b9 100644 --- a/src/rendering/webgpu/WebGpuTransformStorage.ts +++ b/src/rendering/webgpu/WebGpuTransformStorage.ts @@ -14,6 +14,7 @@ export class WebGpuTransformStorage { private _storageCapacity = 0; private _storageHash = 0; private _storageCount = -1; + private _needsFullUpload = false; private _accountant: GpuResourceAccountant | null = null; /** GPU bytes currently booked for the storage buffer with the resource accountant. */ private _accountedBytes = 0; @@ -27,10 +28,6 @@ export class WebGpuTransformStorage { return this._buffer; } - public begin(nodeCount: number): void { - this._buffer.begin(nodeCount); - } - public writeCommand(command: DrawCommand, transform?: Matrix): void { const drawable = command.drawable; @@ -99,14 +96,28 @@ export class WebGpuTransformStorage { } if (snapshot.changed || snapshot.hash !== this._storageHash || snapshot.count !== this._storageCount) { - // Upload only the rows actually written since the last upload (delta), so - // barrier-heavy frames don't re-upload the whole growing buffer. A reused - // slot below the high-water mark is in the dirty range, so it re-uploads. + // Always consume the dirty range first to clear it, regardless of which upload + // path runs — a stale dirty range must never leak into the next flush. const { firstRow, rowCount } = this._buffer.consumeDirtyRange(snapshot.count); - if (rowCount > 0) { - const slotBytes = slotFloatCount * Float32Array.BYTES_PER_ELEMENT; + const slotBytes = slotFloatCount * Float32Array.BYTES_PER_ELEMENT; + if (this._needsFullUpload) { + // Post-grow: the new GPUBuffer is empty; upload the full [0, snapshot.count) + // range so rows already consumed by earlier flushes this frame are present. + device.queue.writeBuffer( + this._storageBuffer!, + 0, + this._buffer.data.buffer, + this._buffer.data.byteOffset, + snapshot.count * slotBytes, + ); + this._buffer.recordUpload(snapshot.count); + this._accountant?.recordBufferUpload(snapshot.count * slotBytes); + this._needsFullUpload = false; + } else if (rowCount > 0) { + // Normal delta path: upload only the rows written since the last upload. + // A reused slot below the high-water mark is in the dirty range, so it re-uploads. device.queue.writeBuffer( this._storageBuffer!, firstRow * slotBytes, @@ -156,6 +167,7 @@ export class WebGpuTransformStorage { this._storageCapacity = nextCapacity; this._storageHash = 0; this._storageCount = -1; + this._needsFullUpload = true; // Re-book the storage footprint (free the prior buffer's bytes, allocate the new). this._accountedBytes = this._accountant?.reallocate(this._accountedBytes, nextCapacity) ?? this._accountedBytes; } From ac08b290726b4eb9e82569b1e3d89d860a40faf3 Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 16:47:58 +0200 Subject: [PATCH 09/12] fix(webgpu): restore begin() wrapper + fix RT-display test for frame-scoped slots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior commit removed WebGpuTransformStorage.begin() as dead code, but it has ~25 test call sites (30 tests broke). Restore it. Also the webgpu-backend RenderTexture+Sprite test asserted the sprite transform in slot 0, but with frame-scoped batching the graphics-into-RT is slot 0 and the sprite lands in slot 1 — read slot 1 (transform verified: tx=24, ty=18 present after the full-upload-on-grow). Full exojs project green (2510); no other regressions. Process note: the exojs unit project (test/**) was not run during the earlier tasks — only rendering-perf + browser-webgl; this surfaced both issues. --- src/rendering/webgpu/WebGpuTransformStorage.ts | 5 +++++ test/rendering/webgpu-backend.test.ts | 17 +++++++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/rendering/webgpu/WebGpuTransformStorage.ts b/src/rendering/webgpu/WebGpuTransformStorage.ts index 957061b9..f03a59fe 100644 --- a/src/rendering/webgpu/WebGpuTransformStorage.ts +++ b/src/rendering/webgpu/WebGpuTransformStorage.ts @@ -28,6 +28,11 @@ export class WebGpuTransformStorage { return this._buffer; } + /** Reset the underlying frame-scoped buffer. Used directly by tests. @internal */ + public begin(nodeCount = 0): void { + this._buffer.begin(nodeCount); + } + public writeCommand(command: DrawCommand, transform?: Matrix): void { const drawable = command.drawable; diff --git a/test/rendering/webgpu-backend.test.ts b/test/rendering/webgpu-backend.test.ts index d0e7c284..0e057969 100644 --- a/test/rendering/webgpu-backend.test.ts +++ b/test/rendering/webgpu-backend.test.ts @@ -1790,10 +1790,15 @@ describe('WebGpuBackend', () => { manager.flush(); manager.destroy(); - // The sprite's world transform now lives in the shared transform storage - // buffer (uploaded as the last writeBuffer of the sprite flush), not inline - // in the instance buffer. Slot 0 = (a, b, c, d, tx, ty, 0, 0, tint…); an + // The sprite's world transform lives in the shared transform storage buffer + // (the last writeBuffer of the sprite flush carries the whole buffer's + // ArrayBuffer), not inline in the instance buffer. The buffer is frame-scoped + // (cross-call batching): the graphics rendered into the RenderTexture is the + // first shared-buffer write (slot 0), so the sprite is the second and lands + // in slot 1. Each slot is 12 floats (a, b, c, d, tx, ty, 0, 0, tint…); an // unrotated sprite at (24, 18) has b == 0 and carries that translation. + const slotFloats = 12; + const spriteBase = 1 * slotFloats; // slot 1 const transformWrite = environment.queue.writeBuffer.mock.calls[environment.queue.writeBuffer.mock.calls.length - 1]; const data = new Float32Array(transformWrite[2] as ArrayBuffer); @@ -1801,9 +1806,9 @@ describe('WebGpuBackend', () => { expect(environment.pass.drawIndexed).toHaveBeenCalled(); expect(environment.queue.submit.mock.calls.length).toBeGreaterThanOrEqual(2); expect(environment.textures.length).toBeGreaterThan(0); - expect(data[1]).toBe(0); - expect(data[4]).toBe(24); - expect(data[5]).toBe(18); + expect(data[spriteBase + 1]).toBe(0); + expect(data[spriteBase + 4]).toBe(24); + expect(data[spriteBase + 5]).toBe(18); } finally { environment.restore(); } From afb0ec71598ed91ec2a0b76fb90222fb6c562af6 Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 16:54:53 +0200 Subject: [PATCH 10/12] chore(rendering): fix import order + prettier formatting Autofix: sort the playRenderTree import in the perf harness; prettier-format WebGpuTransformStorage after the begin() restore. verify:quick green. --- src/rendering/webgpu/WebGpuTransformStorage.ts | 8 +------- test/perf/rendering/harness.ts | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/rendering/webgpu/WebGpuTransformStorage.ts b/src/rendering/webgpu/WebGpuTransformStorage.ts index f03a59fe..5726bfc2 100644 --- a/src/rendering/webgpu/WebGpuTransformStorage.ts +++ b/src/rendering/webgpu/WebGpuTransformStorage.ts @@ -110,13 +110,7 @@ export class WebGpuTransformStorage { if (this._needsFullUpload) { // Post-grow: the new GPUBuffer is empty; upload the full [0, snapshot.count) // range so rows already consumed by earlier flushes this frame are present. - device.queue.writeBuffer( - this._storageBuffer!, - 0, - this._buffer.data.buffer, - this._buffer.data.byteOffset, - snapshot.count * slotBytes, - ); + device.queue.writeBuffer(this._storageBuffer!, 0, this._buffer.data.buffer, this._buffer.data.byteOffset, snapshot.count * slotBytes); this._buffer.recordUpload(snapshot.count); this._accountant?.recordBufferUpload(snapshot.count * slotBytes); this._needsFullUpload = false; diff --git a/test/perf/rendering/harness.ts b/test/perf/rendering/harness.ts index 67da7aae..2af75114 100644 --- a/test/perf/rendering/harness.ts +++ b/test/perf/rendering/harness.ts @@ -7,9 +7,9 @@ * * @internal Test/perf-only. */ +import { playRenderTree } from '#rendering/plan/playRenderTree'; import type { RenderNode } from '#rendering/RenderNode'; import type { View } from '#rendering/View'; -import { playRenderTree } from '#rendering/plan/playRenderTree'; import { WebGl2Backend } from '#rendering/webgl2/WebGl2Backend'; import { wireCoreRenderers } from '../../rendering/browser/_coreRenderers'; From c9b34ff380e24a01fdad15f524a3d02395afd8d8 Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 17:00:40 +0200 Subject: [PATCH 11/12] test(particles): seed frame-scoped batching stacks in WebGpuBackend mock The particle GPU-injection test mocks the backend via Object.create(prototype), bypassing the constructor that initializes _planBaseStack/_planHashStack (used by _beginDrawPlan since the cross-call batching work). Seed them like the existing device mock. Full test suite green (3609). --- packages/exojs-particles/test/particle-gpu.test.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/exojs-particles/test/particle-gpu.test.ts b/packages/exojs-particles/test/particle-gpu.test.ts index 333945d0..93296074 100644 --- a/packages/exojs-particles/test/particle-gpu.test.ts +++ b/packages/exojs-particles/test/particle-gpu.test.ts @@ -384,6 +384,10 @@ describe('ParticleSystem render-inject backend detection', () => { const env = makeMockDevice(); const fakeBackend = Object.create(WebGpuBackend.prototype) as object; Object.defineProperty(fakeBackend, 'device', { value: env.device, configurable: true }); + // Frame-scoped batching uses these instance stacks in _beginDrawPlan/_endDrawPlan; + // Object.create bypasses the constructor that initializes them, so seed them here. + Object.defineProperty(fakeBackend, '_planBaseStack', { value: [], configurable: true }); + Object.defineProperty(fakeBackend, '_planHashStack', { value: [], configurable: true }); const system = new ParticleSystem(makeTexture(), { capacity: 4 }); system.addUpdateModule(new ApplyForce(0, 0)); From 9fe903a7bf680165f0919e9caa7dbb9aaea50a2f Mon Sep 17 00:00:00 2001 From: Exoridus Date: Sun, 28 Jun 2026 17:34:36 +0200 Subject: [PATCH 12/12] docs(rendering): update stale comments + add TransformBuffer dirty-range tests - RenderingContext: setView flushes only on view change (not unconditionally); correctness rests on trailing flush() and renderer-switch flushes - RenderInstruction: nodeIndex is frame-global [frameBase, frameBase+nodeCount), not plan-local [0, nodeCount) - WebGpuTransformStorage: clarify consumeDirtyRange is inside the upload branch only; add upload-guard note explaining why a skipped flush is safe - WebGl2Backend: same upload-guard safety note as WebGpu counterpart - test: 6 new TransformBuffer dirty-range cases (consumeDirtyRange sentinel, coverage+self-clearing, below-HWM reuse, clamping, rewindTo, begin reset) --- src/rendering/RenderingContext.ts | 17 ++-- src/rendering/plan/RenderInstruction.ts | 7 +- src/rendering/webgl2/WebGl2Backend.ts | 4 + .../webgpu/WebGpuTransformStorage.ts | 10 ++- test/rendering/transform-buffer.test.ts | 87 +++++++++++++++++++ 5 files changed, 113 insertions(+), 12 deletions(-) diff --git a/src/rendering/RenderingContext.ts b/src/rendering/RenderingContext.ts index 7e9b8a1b..2c66b99c 100644 --- a/src/rendering/RenderingContext.ts +++ b/src/rendering/RenderingContext.ts @@ -261,11 +261,10 @@ export class RenderingContext implements System { const view = options.view ?? this._camera; const mesh = (this._immediateMesh ??= new ImmediateMesh()); - // Set the view first: this flushes whatever renderer a prior render() / - // drawGeometry left pending, so the shared transform buffer is free for this - // draw's synthetic slot and the pooled mesh is safe to reconfigure. The - // immediate flush below then keeps a later drawGeometry from observing this - // pooled mesh through a still-deferred draw. + // Set the view first: setView now only flushes when the view actually changes + // (not unconditionally). Correctness here rests on (a) the trailing flush() + // below — so a later drawGeometry cannot observe this pooled mesh through a + // still-deferred draw — and (b) any renderer switch flushing its pending batch. this._backend.setView(view); mesh.configure(geometry, transform, material, options.tint ?? null); this._backend.draw(mesh); @@ -302,9 +301,11 @@ export class RenderingContext implements System { const view = options.view ?? this._camera; const mesh = (this._batchMesh ??= new ImmediateMesh()); - // Set the view first (flushing any renderer left pending), configure the - // pooled geometry/look source, then submit a single instanced draw over the - // batch's per-instance transforms/tints and flush it immediately. + // Set the view first (setView only flushes when the view actually changes; + // correctness rests on the trailing flush() below and on any renderer switch + // flushing its pending batch), configure the pooled geometry/look source, + // then submit a single instanced draw over the batch's per-instance + // transforms/tints and flush it immediately. this._backend.setView(view); mesh.configureBatchSource(batch.geometry, batch.material); this._backend.drawInstanced(mesh, batch._instanceTransforms, batch._instanceTints, batch.count); diff --git a/src/rendering/plan/RenderInstruction.ts b/src/rendering/plan/RenderInstruction.ts index 27132776..91161945 100644 --- a/src/rendering/plan/RenderInstruction.ts +++ b/src/rendering/plan/RenderInstruction.ts @@ -9,8 +9,11 @@ import type { GroupScope } from './RenderScope'; * names the concept the plan player consumes and that the batching layer * reorders, independent of how the draw happens to be stored in the scope * tree. Future {@link TransformBuffer} slotting keys on each instruction's - * stable {@link DrawCommand.nodeIndex} (within the `[0, plan.nodeCount)` - * slot space). + * stable {@link DrawCommand.nodeIndex}. Each index is frame-global — + * `[frameBase, frameBase + plan.nodeCount)` — because the transform buffer + * is frame-scoped and the builder bases node indices at the current buffer + * slot count (`frameBase`) so every plan in the frame occupies distinct + * slots and can batch cross-call. * * Batch units (maximal runs of consecutive instructions in a {@link GroupScope} * sharing GPU pipeline/bind state) are not materialized: the plan player walks diff --git a/src/rendering/webgl2/WebGl2Backend.ts b/src/rendering/webgl2/WebGl2Backend.ts index 87c1a5c5..c9e660ba 100644 --- a/src/rendering/webgl2/WebGl2Backend.ts +++ b/src/rendering/webgl2/WebGl2Backend.ts @@ -835,6 +835,10 @@ export class WebGl2Backend implements RenderBackend { throw new Error('Transform texture must be initialized before binding.'); } + // A skipped flush (all three guards false) leaves the dirty range uncleared + // until the next begin(). Safe: every write() mixes its slot into _frameHash, + // so a non-empty dirty range always coincides with snapshot.changed = true — + // the upload branch is always taken before any dirty rows could be stale. if (snapshot.changed || snapshot.count !== this._transformTextureCount || snapshot.hash !== this._transformTextureHash) { // Upload only the rows actually written since the last upload (delta), so // barrier-heavy frames don't re-upload the whole growing buffer. A reused diff --git a/src/rendering/webgpu/WebGpuTransformStorage.ts b/src/rendering/webgpu/WebGpuTransformStorage.ts index 5726bfc2..529e3e0c 100644 --- a/src/rendering/webgpu/WebGpuTransformStorage.ts +++ b/src/rendering/webgpu/WebGpuTransformStorage.ts @@ -100,9 +100,15 @@ export class WebGpuTransformStorage { this._growBuffer(device, requiredBytes); } + // A skipped flush (all three guards false) leaves the dirty range uncleared + // until the next begin(). Safe: every write() mixes its slot into _frameHash, + // so a non-empty dirty range always coincides with snapshot.changed = true — + // the upload branch is always taken before any dirty rows could be stale. if (snapshot.changed || snapshot.hash !== this._storageHash || snapshot.count !== this._storageCount) { - // Always consume the dirty range first to clear it, regardless of which upload - // path runs — a stale dirty range must never leak into the next flush. + // Always consume the dirty range first to clear it — regardless of whether + // the full-upload path (post-grow) or the delta path runs below. Both paths + // are inside this if-branch; the skip case (snapshot unchanged) never reaches + // here, so the dirty range is only consumed when an upload is actually issued. const { firstRow, rowCount } = this._buffer.consumeDirtyRange(snapshot.count); const slotBytes = slotFloatCount * Float32Array.BYTES_PER_ELEMENT; diff --git a/test/rendering/transform-buffer.test.ts b/test/rendering/transform-buffer.test.ts index 5d5d5d30..ed74a847 100644 --- a/test/rendering/transform-buffer.test.ts +++ b/test/rendering/transform-buffer.test.ts @@ -168,4 +168,91 @@ describe('TransformBuffer', () => { parent.destroy(); }); + + test('consumeDirtyRange returns empty sentinel on a fresh buffer after begin()', () => { + const buffer = new TransformBuffer(); + + buffer.begin(); + const result = buffer.consumeDirtyRange(10); + + expect(result.rowCount).toBe(0); + expect(result.firstRow).toBe(0); + }); + + test('consumeDirtyRange covers all written slots and clears itself on second call', () => { + const buffer = new TransformBuffer(); + const identity = new Matrix(); + + buffer.begin(); + buffer.write(0, identity, Color.white); + buffer.write(1, identity, Color.white); + buffer.write(2, identity, Color.white); + + const first = buffer.consumeDirtyRange(3); + + expect(first).toEqual({ firstRow: 0, rowCount: 3 }); + + const second = buffer.consumeDirtyRange(3); + + expect(second.rowCount).toBe(0); + }); + + test('consumeDirtyRange tracks reuse below the high-water mark', () => { + const buffer = new TransformBuffer(); + const identity = new Matrix(); + + buffer.begin(); + buffer.write(0, identity, Color.white); + buffer.write(1, identity, Color.white); + buffer.write(2, identity, Color.white); + buffer.consumeDirtyRange(3); // clear after first writes + + buffer.write(1, identity, Color.white); // reuse slot 1 below high-water mark + + const result = buffer.consumeDirtyRange(3); + + expect(result).toEqual({ firstRow: 1, rowCount: 1 }); + }); + + test('consumeDirtyRange clamps to maxCount — a write above the limit is excluded', () => { + const buffer = new TransformBuffer(); + const identity = new Matrix(); + + buffer.begin(); + buffer.write(5, identity, Color.white); // slot 5 is above maxCount = 3 + + const result = buffer.consumeDirtyRange(3); + + expect(result.rowCount).toBe(0); + }); + + test('rewindTo restores the write cursor and optionally the frame hash', () => { + const buffer = new TransformBuffer(); + const identity = new Matrix(); + + buffer.begin(); + buffer.write(0, identity, Color.white); + const savedHash = buffer.frameHash; + + buffer.write(1, identity, Color.white); + buffer.rewindTo(1, savedHash); + + expect(buffer.count).toBe(1); + expect(buffer.frameHash).toBe(savedHash); + }); + + test('begin() resets the dirty range so consumeDirtyRange returns empty', () => { + const buffer = new TransformBuffer(); + const identity = new Matrix(); + + buffer.begin(); + buffer.write(0, identity, Color.white); + buffer.write(1, identity, Color.white); + + buffer.begin(); // should reset dirty range + + const result = buffer.consumeDirtyRange(10); + + expect(result.rowCount).toBe(0); + }); });