Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions examples/debug-layer/performance-overlay.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Auto-generated from performance-overlay.ts — edit the .ts source, not this file.
import { Application, Color, Keyboard, Scene, Sprite, Texture } from '@codexo/exojs';
import { Application, Color, Container, Keyboard, Scene, Sprite, Texture } from '@codexo/exojs';
import { DebugOverlay } from '@codexo/exojs/debug';
const app = new Application({
canvas: {
Expand All @@ -17,14 +17,21 @@ const debug = new DebugOverlay(app);
debug.layers.performance.visible = true;
class PerformanceOverlayScene extends Scene {
sprites;
layer;
async load(loader) {
await loader.load(Texture, { bunny: 'image/ship-a.png' });
}
init(loader) {
const { width, height } = this.app.canvas;
// All sprites share one texture, so adding them to a single container and
// rendering it once lets the renderer batch them into a single draw call.
// Rendering each sprite with its own `context.render(sprite)` call would
// instead emit one draw call per sprite and tank the frame rate.
this.layer = new Container();
this.sprites = Array.from({ length: 1600 }, () => {
const sprite = new Sprite(loader.get(Texture, 'bunny')).setAnchor(0.5).setScale(0.25);
sprite.setPosition(Math.random() * width, Math.random() * height);
this.layer.addChild(sprite);
return {
sprite,
vx: (Math.random() - 0.5) * 120,
Expand All @@ -47,8 +54,7 @@ class PerformanceOverlayScene extends Scene {
}
draw(context) {
context.backend.clear();
for (const { sprite } of this.sprites)
context.render(sprite);
context.render(this.layer);
}
}
app.start(new PerformanceOverlayScene());
11 changes: 9 additions & 2 deletions examples/debug-layer/performance-overlay.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Application, Color, Keyboard, Scene, Sprite, Texture } from '@codexo/exojs';
import { Application, Color, Container, Keyboard, Scene, Sprite, Texture } from '@codexo/exojs';
import { DebugOverlay } from '@codexo/exojs/debug';

const app = new Application({
Expand All @@ -19,6 +19,7 @@ debug.layers.performance.visible = true;

class PerformanceOverlayScene extends Scene {
private sprites!: { sprite: Sprite; vx: number; vy: number }[];
private layer!: Container;

override async load(loader): Promise<void> {
await loader.load(Texture, { bunny: 'image/ship-a.png' });
Expand All @@ -27,9 +28,15 @@ class PerformanceOverlayScene extends Scene {
override init(loader): void {
const { width, height } = this.app.canvas;

// All sprites share one texture, so adding them to a single container and
// rendering it once lets the renderer batch them into a single draw call.
// Rendering each sprite with its own `context.render(sprite)` call would
// instead emit one draw call per sprite and tank the frame rate.
this.layer = new Container();
this.sprites = Array.from({ length: 1600 }, () => {
const sprite = new Sprite(loader.get(Texture, 'bunny')).setAnchor(0.5).setScale(0.25);
sprite.setPosition(Math.random() * width, Math.random() * height);
this.layer.addChild(sprite);
return {
sprite,
vx: (Math.random() - 0.5) * 120,
Expand All @@ -53,7 +60,7 @@ class PerformanceOverlayScene extends Scene {

override draw(context): void {
context.backend.clear();
for (const { sprite } of this.sprites) context.render(sprite);
context.render(this.layer);
}
}

Expand Down
4 changes: 4 additions & 0 deletions packages/exojs-particles/test/particle-gpu.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,10 @@ describe('ParticleSystem render-inject backend detection', () => {
const env = makeMockDevice();
const fakeBackend = Object.create(WebGpuBackend.prototype) as object;
Object.defineProperty(fakeBackend, 'device', { value: env.device, configurable: true });
// Frame-scoped batching uses these instance stacks in _beginDrawPlan/_endDrawPlan;
// Object.create bypasses the constructor that initializes them, so seed them here.
Object.defineProperty(fakeBackend, '_planBaseStack', { value: [], configurable: true });
Object.defineProperty(fakeBackend, '_planHashStack', { value: [], configurable: true });

const system = new ParticleSystem(makeTexture(), { capacity: 4 });
system.addUpdateModule(new ApplyForce(0, 0));
Expand Down
17 changes: 9 additions & 8 deletions src/rendering/RenderingContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -261,11 +261,10 @@ export class RenderingContext implements System {
const view = options.view ?? this._camera;
const mesh = (this._immediateMesh ??= new ImmediateMesh());

// Set the view first: this flushes whatever renderer a prior render() /
// drawGeometry left pending, so the shared transform buffer is free for this
// draw's synthetic slot and the pooled mesh is safe to reconfigure. The
// immediate flush below then keeps a later drawGeometry from observing this
// pooled mesh through a still-deferred draw.
// Set the view first: setView now only flushes when the view actually changes
// (not unconditionally). Correctness here rests on (a) the trailing flush()
// below — so a later drawGeometry cannot observe this pooled mesh through a
// still-deferred draw — and (b) any renderer switch flushing its pending batch.
this._backend.setView(view);
mesh.configure(geometry, transform, material, options.tint ?? null);
this._backend.draw(mesh);
Expand Down Expand Up @@ -302,9 +301,11 @@ export class RenderingContext implements System {
const view = options.view ?? this._camera;
const mesh = (this._batchMesh ??= new ImmediateMesh());

// Set the view first (flushing any renderer left pending), configure the
// pooled geometry/look source, then submit a single instanced draw over the
// batch's per-instance transforms/tints and flush it immediately.
// Set the view first (setView only flushes when the view actually changes;
// correctness rests on the trailing flush() below and on any renderer switch
// flushing its pending batch), configure the pooled geometry/look source,
// then submit a single instanced draw over the batch's per-instance
// transforms/tints and flush it immediately.
this._backend.setView(view);
mesh.configureBatchSource(batch.geometry, batch.material);
this._backend.drawInstanced(mesh, batch._instanceTransforms, batch._instanceTints, batch.count);
Expand Down
64 changes: 64 additions & 0 deletions src/rendering/TransformBuffer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ export class TransformBuffer {
private _skippedWriteCount = 0;
private _uploadCount = 0;
private _uploadedRecordCount = 0;
// Dirty row range [_dirtyMin, _dirtyMax] written since the last upload — the
// exact rows a delta upload must push. Empty when `_dirtyMax < _dirtyMin`.
// Tracked by slot (not a high-water mark) so a reused slot (nested-plan
// rewind, filter composite) is correctly re-uploaded.
private _dirtyMin = 0;
private _dirtyMax = -1;

public get count(): number {
return this._count;
Expand Down Expand Up @@ -94,6 +100,11 @@ export class TransformBuffer {
return this._version;
}

/** Running content hash of the rows written since begin(). @internal */
public get frameHash(): number {
return this._frameHash;
}

public begin(expectedCount = 0): this {
if (expectedCount > 0) {
this._ensureCapacity(expectedCount);
Expand All @@ -105,6 +116,8 @@ export class TransformBuffer {
this._skippedWriteCount = 0;
this._uploadCount = 0;
this._uploadedRecordCount = 0;
this._dirtyMin = 0;
this._dirtyMax = -1;

return this;
}
Expand All @@ -117,6 +130,47 @@ export class TransformBuffer {
return slot;
}

/**
* Rewind the write cursor to `count`, freeing the rows above it for reuse, and
* (optionally) restore the running content hash to its pre-rewind value so the
* freed rows' writes don't linger in the hash and trigger spurious re-uploads.
* Used by nested draw plans (filters / cacheAsBitmap) to isolate their slots.
* @internal
*/
public rewindTo(count: number, frameHash?: number): this {
if (count >= 0 && count < this._count) {
this._count = count;

if (frameHash !== undefined) {
this._frameHash = frameHash >>> 0;
}
}

return this;
}

/**
* Consume the dirty row range written since the last upload, clamped to
* `[0, maxCount)`, and clear it. Returns the contiguous `[firstRow, firstRow +
* rowCount)` a delta upload should push (`rowCount === 0` when nothing is
* dirty). The backend calls this at its upload boundary.
* @internal
*/
public consumeDirtyRange(maxCount: number): { firstRow: number; rowCount: number } {
if (this._dirtyMax < this._dirtyMin) {
return { firstRow: 0, rowCount: 0 };
}

const firstRow = Math.max(0, this._dirtyMin);
const lastRow = Math.min(this._dirtyMax, maxCount - 1);
const rowCount = lastRow >= firstRow ? lastRow - firstRow + 1 : 0;

this._dirtyMin = 0;
this._dirtyMax = -1;

return { firstRow, rowCount };
}

public write(slot: number, transform: Matrix, tint: Color): this {
if (!Number.isInteger(slot) || slot < 0) {
throw new Error(`TransformBuffer slot must be a non-negative integer (got ${slot}).`);
Expand Down Expand Up @@ -144,6 +198,16 @@ export class TransformBuffer {
this._count = slot + 1;
}

// Track the exact written-slot range so a delta upload pushes precisely the
// changed rows — including a slot reused below the high-water mark.
if (this._dirtyMax < this._dirtyMin) {
this._dirtyMin = slot;
this._dirtyMax = slot;
} else {
if (slot < this._dirtyMin) this._dirtyMin = slot;
if (slot > this._dirtyMax) this._dirtyMax = slot;
}

this._frameHash = this._mix(this._frameHash, slot);

for (let i = 0; i < floatsPerSlot; i++) {
Expand Down
7 changes: 5 additions & 2 deletions src/rendering/plan/RenderInstruction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ import type { GroupScope } from './RenderScope';
* names the concept the plan player consumes and that the batching layer
* reorders, independent of how the draw happens to be stored in the scope
* tree. Future {@link TransformBuffer} slotting keys on each instruction's
* stable {@link DrawCommand.nodeIndex} (within the `[0, plan.nodeCount)`
* slot space).
* stable {@link DrawCommand.nodeIndex}. Each index is frame-global —
* `[frameBase, frameBase + plan.nodeCount)` — because the transform buffer
* is frame-scoped and the builder bases node indices at the current buffer
* slot count (`frameBase`) so every plan in the frame occupies distinct
* slots and can batch cross-call.
*
* Batch units (maximal runs of consecutive instructions in a {@link GroupScope}
* sharing GPU pipeline/bind state) are not materialized: the plan player walks
Expand Down
8 changes: 6 additions & 2 deletions src/rendering/plan/RenderPlanBuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,11 @@ export class RenderPlanBuilder {
this._barrierEntryPoolCursor = 0;
this._scopeStack.length = 0;
this._hasPending = false;
this._nodeIndex = 0;
// Base this plan's node indices after whatever earlier render() calls already
// wrote into the frame-scoped transform buffer, so every draw across all
// render() calls in the frame references a distinct slot and can batch.
const frameBase = (backend as { transformBufferCount?: number }).transformBufferCount ?? 0;
this._nodeIndex = frameBase;

const rootScope = this._acquireGroupScope(false);

Expand All @@ -110,7 +114,7 @@ export class RenderPlanBuilder {
});
}

this._plan.nodeCount = this._nodeIndex;
this._plan.nodeCount = this._nodeIndex - frameBase;

return this._plan;
}
Expand Down
61 changes: 52 additions & 9 deletions src/rendering/webgl2/WebGl2Backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ export class WebGl2Backend implements RenderBackend {
private _transformTextureCount = -1;
private _activeDrawCommand: DrawCommand | null = null;
private _drawPlanDepth = 0;
private readonly _planBaseStack: number[] = [];
private readonly _planHashStack: number[] = [];

public constructor(app: Application) {
const canvasOptions = app.options.canvas ?? {};
Expand Down Expand Up @@ -279,13 +281,27 @@ export class WebGl2Backend implements RenderBackend {

public resetStats(): this {
resetRenderStats(this._stats);
// The transform buffer is frame-scoped: reset it once per frame here (was
// previously reset per render() call in _beginDrawPlan).
this._transformBuffer.begin();

return this;
}

/** Frame-global slot base the plan builder indexes from. @internal */
public get transformBufferCount(): number {
return this._transformBuffer.count;
}

/** @internal */
public _beginDrawPlan(nodeCount: number): void {
this._transformBuffer.begin(nodeCount);
public _beginDrawPlan(_nodeCount: number): void {
// Do NOT reset the transform buffer here — it is frame-scoped (reset in
// resetStats). The builder already based this plan's node indices at the
// current buffer count, so writes land in fresh frame-global slots and
// batches survive across render() calls. Remember this plan's base so a
// nested plan can free its rows on end.
this._planBaseStack.push(this._transformBuffer.count);
this._planHashStack.push(this._transformBuffer.frameHash);
this._activeDrawCommand = null;
this._drawPlanDepth++;
}
Expand Down Expand Up @@ -395,13 +411,23 @@ export class WebGl2Backend implements RenderBackend {
public _endDrawPlan(): void {
this._activeDrawCommand = null;

const planBase = this._planBaseStack.pop() ?? 0;
const planHash = this._planHashStack.pop() ?? 0;

if (this._drawPlanDepth > 0) {
this._drawPlanDepth--;
}

// Only assert balance at the outermost plan: cacheAsBitmap draws a cache
// sprite via a nested render(), whose inner _endDrawPlan sees the still-open
// outer clips — those are not leaks.
// A nested plan (filter / cacheAsBitmap) just ended: flush its draws, then
// free its transform rows so the frame-scoped buffer only grows with
// top-level render() calls. Top-level plans (depth back to 0) keep their rows
// so cross-call batching survives to the frame-end flush.
if (this._drawPlanDepth > 0) {
this._flushActiveRenderer();
this._transformBuffer.rewindTo(planBase, planHash);
}

// Only assert balance at the outermost plan.
if (this._drawPlanDepth === 0) {
this._assertBalancedStencil();
}
Expand Down Expand Up @@ -715,7 +741,12 @@ export class WebGl2Backend implements RenderBackend {
}

public setView(view: View | null): this {
this._flushActiveRenderer();
// Only flush the open batch when the view actually changes. The unconditional
// flush forced one draw call per render() call (each render() re-applies the
// same camera view), defeating cross-call batching.
if (this._renderTarget.view !== view) {
this._flushActiveRenderer();
}
this._renderTarget.setView(view);
this._bindRenderTarget(this._renderTarget);

Expand Down Expand Up @@ -804,11 +835,23 @@ export class WebGl2Backend implements RenderBackend {
throw new Error('Transform texture must be initialized before binding.');
}

// A skipped flush (all three guards false) leaves the dirty range uncleared
// until the next begin(). Safe: every write() mixes its slot into _frameHash,
// so a non-empty dirty range always coincides with snapshot.changed = true —
// the upload branch is always taken before any dirty rows could be stale.
if (snapshot.changed || snapshot.count !== this._transformTextureCount || snapshot.hash !== this._transformTextureHash) {
nextTransformTexture.commitRect(0, 0, 3, snapshot.count);
this._transformBuffer.recordUpload(snapshot.count);
this._transformTextureHash = snapshot.hash;
// Upload only the rows actually written since the last upload (delta), so
// barrier-heavy frames don't re-upload the whole growing buffer. A reused
// slot below the high-water mark is in the dirty range, so it re-uploads.
const { firstRow, rowCount } = this._transformBuffer.consumeDirtyRange(snapshot.count);

if (rowCount > 0) {
nextTransformTexture.commitRect(0, firstRow, 3, rowCount);
this._transformBuffer.recordUpload(rowCount);
}

this._transformTextureCount = snapshot.count;
this._transformTextureHash = snapshot.hash;
}

return this.bindTexture(nextTransformTexture, unit);
Expand Down
Loading