diff --git a/example/lib/crispness_probe.dart b/example/lib/crispness_probe.dart new file mode 100644 index 0000000..a25af6f --- /dev/null +++ b/example/lib/crispness_probe.dart @@ -0,0 +1,129 @@ +// Crispness probe — verifies renderScale keeps an OSR webview sharp when the view +// is visually SCALED by an ancestor transform (the infinite-canvas zoom case). +// +// A single CefWebView is wrapped in a Transform.scale to mimic a canvas zoom: the +// widget's logical size is unchanged, the transform just magnifies it. Without +// renderScale the OSR buffer stays at 1x-zoom resolution and the transform upscales +// it (blurry); with renderScale = screenDpr * zoom the page re-renders at the +// on-screen pixel density (crisp). +// +// Controls: + / - zoom in/out; R toggle renderScale (crisp) vs none (blurry). +// +// Run: +// FLUTTER_CEF_HOST=<.../cef_host> FLUTTER_CEF_ALLOW_INSECURE_PROFILE=1 \ +// flutter run -d macos -t lib/crispness_probe.dart +import 'package:flutter/material.dart'; +import 'package:flutter_cef/flutter_cef.dart'; + +// A text-heavy page — blur shows up most clearly on small text + thin rules. +const _textHtml = ''' + +

Crispness test

+

The quick brown fox jumps over the lazy dog. 0123456789. +Small text and thin hairlines reveal upscaling blur.

+
+

Zoom in: with renderScale this text re-rasterizes sharp; +without it, the 1x texture is magnified and goes soft.

+ + + + +
Col ACol BCol C
row 11.0crisp
row 22.0edges
+'''; + +void main() => runApp(const CrispApp()); + +class CrispApp extends StatefulWidget { + const CrispApp({super.key}); + @override + State createState() => _CrispAppState(); +} + +class _CrispAppState extends State { + final _controller = CefWebController(); + double _zoom = 1.0; + bool _crisp = true; + + @override + void initState() { + super.initState(); + _controller.onPageStarted = (_) => _controller.loadHtmlString(_textHtml); + } + + @override + void dispose() { + _controller.dispose(); + super.dispose(); + } + + @override + Widget build(BuildContext context) { + final screenDpr = MediaQuery.maybeOf(context)?.devicePixelRatio ?? 2.0; + // The whole point: when crisp, render at screenDpr*zoom so the buffer has enough + // pixels for the transform's magnification; otherwise leave it at screenDpr (blurry). + final renderScale = _crisp ? screenDpr * _zoom : null; + return MaterialApp( + debugShowCheckedModeBanner: false, + home: Scaffold( + backgroundColor: const Color(0xFF202733), + body: Column( + children: [ + Container( + width: double.infinity, + color: const Color(0xFF0B1220), + padding: const EdgeInsets.symmetric(horizontal: 12, vertical: 8), + child: Row( + children: [ + Expanded( + child: Text( + 'zoom=${_zoom.toStringAsFixed(2)} ' + 'renderScale=${renderScale?.toStringAsFixed(2) ?? "OFF (blurry)"}', + style: const TextStyle(color: Colors.white), + ), + ), + _btn('−', () => setState( + () => _zoom = (_zoom - 0.25).clamp(0.5, 4.0))), + const SizedBox(width: 6), + _btn('+', () => setState( + () => _zoom = (_zoom + 0.25).clamp(0.5, 4.0))), + const SizedBox(width: 14), + _btn(_crisp ? 'crisp ✓' : 'blurry', + () => setState(() => _crisp = !_crisp), + wide: true), + ], + ), + ), + Expanded( + child: Center( + // Fixed logical size, magnified by Transform.scale — exactly the + // infinite-canvas case where `size` doesn't change on zoom. + child: Transform.scale( + scale: _zoom, + child: SizedBox( + width: 360, + height: 300, + child: CefWebView( + url: 'about:blank', + controller: _controller, + renderScale: renderScale, + ), + ), + ), + ), + ), + ], + ), + ), + ); + } + + Widget _btn(String label, VoidCallback onTap, {bool wide = false}) => + ElevatedButton( + onPressed: onTap, + style: ElevatedButton.styleFrom( + minimumSize: Size(wide ? 90 : 44, 36), + padding: const EdgeInsets.symmetric(horizontal: 8), + ), + child: Text(label), + ); +} diff --git a/example/lib/stress_probe.dart b/example/lib/stress_probe.dart new file mode 100644 index 0000000..5218c44 --- /dev/null +++ b/example/lib/stress_probe.dart @@ -0,0 +1,456 @@ +// Performance stress probe — many concurrent CefWebViews, for measuring render +// smoothness, memory, process count, and fd footprint at scale. +// +// Mounts a grid of N CefWebViews each loading a continuously-animating page +// (CSS animation + rAF counter -> continuous OnAcceleratedPaint), reports Flutter +// frame timing (avg / p90 / jank%) every 2s to stdout + /tmp/cef_stress.jsonl, +// and offers +/- view and churn (create+dispose loop) controls. Pair with +// test/perf_sample.sh to sample `pgrep cef_host` / RSS / fd over the run. +// +// Profile knob: kProfile='stress' (shared host — the engine multi-view path) vs +// null (ephemeral — one cef_host per view, the process-blowup baseline). +// +// Run: +// FLUTTER_CEF_HOST=<.../cef_host> FLUTTER_CEF_ALLOW_INSECURE_PROFILE=1 \ +// flutter run -d macos -t lib/stress_probe.dart +import 'dart:async'; +import 'dart:convert'; +import 'dart:io'; + +import 'package:flutter/material.dart'; +import 'package:flutter/scheduler.dart'; +import 'package:flutter_cef/flutter_cef.dart'; + +// ── knobs ────────────────────────────────────────────────────────────────── +// --dart-define=CEF_EPHEMERAL=true => null profile (one cef_host per view, the +// process-blowup baseline); default => shared host (the engine multi-view path). +const String? kProfile = + bool.fromEnvironment('CEF_EPHEMERAL') ? null : 'stress'; +// Bounded host pool: bucket views across kPoolSize profiles (~kInitialViews/kPoolSize +// browsers per shared cef_host / GPU process) so no single GPU/Viz process is asked +// to drive too many accelerated OSR browsers at once (which leaves some never +// painting). kPoolSize=1 reproduces the single-host blank-tile bug. +const int kPoolSize = int.fromEnvironment('CEF_POOL', defaultValue: 4); +const int kInitialViews = int.fromEnvironment('CEF_INITIAL', defaultValue: 12); +// --dart-define=CEF_STATIC=true => load a STATIC page (paints once, no rAF/CSS +// animation) instead of the 60fps gradient — models real (mostly static) agent_ui +// content vs the continuous-animation worst case, to isolate sustained-compositing +// load from the create-time first-frame race. +const bool kStatic = bool.fromEnvironment('CEF_STATIC'); +// --dart-define=CEF_CREATE_DELAY_MS=1500 => bring the initial views up GRADUALLY +// (one every N ms, like opening browser windows by hand) instead of all-at-once. +// Tests whether the never-paint stall is a create-burst surface-handshake race. +const int kCreateDelayMs = + int.fromEnvironment('CEF_CREATE_DELAY_MS', defaultValue: 0); +// --dart-define=CEF_TILE_PX=140 => render each view at a FIXED small size (in a +// wrap) instead of the full-window grid. Separates a fill-rate / GPU-bandwidth +// limit (small tiles => more fit) from a per-browser sink-COUNT cap (fixed ~8 +// regardless of size). +const int kTilePx = int.fromEnvironment('CEF_TILE_PX', defaultValue: 0); +// --dart-define=CEF_FORCE_REPAINT=true => drive a 60fps Flutter repaint (setState) +// so the Texture widgets are pulled every frame. Tests whether the "static" display +// is just idle Flutter not pulling produced frames (textureFrameAvailable not +// scheduling a frame), independent of GPU production. +const bool kForceRepaint = bool.fromEnvironment('CEF_FORCE_REPAINT'); +// --dart-define=CEF_RECREATE_ON_STALL=true => when a tile's watchdog reports it never +// produced a first frame (onPaintStalled), dispose + recreate it (a fresh browser + +// capturer). Self-heals the intermittent OSR capturer-establishment failure. +const bool kRecreateOnStall = bool.fromEnvironment('CEF_RECREATE_ON_STALL'); +// Max destructive recreates per tile before falling back to pump-patience (never churn). +const int kMaxRecreates = int.fromEnvironment('CEF_MAX_RECREATES', defaultValue: 2); +// --dart-define=CEF_LIVE_CAP=6 => MASKING approach: keep only N tiles CEF-visible +// (setVisible(true)) at a time; the rest are setVisible(false) (WasHidden → capturer +// idle, frozen on last frame), rotating so every tile gets a live turn to establish. +// Models the Campus "only live-render the most-relevant ~6 webviews" policy. +const int kLiveCap = int.fromEnvironment('CEF_LIVE_CAP', defaultValue: 0); +// --dart-define=CEF_ANIM_DELAY_MS=1000 => animated content that stays STATIC for the +// first N ms after load, THEN starts animating (rAF + CSS). Tests whether the blank is +// an animation-DURING-establishment race: if all establish their first frame while +// static, then start animating, the fix is "establish before animating". +const int kAnimDelayMs = int.fromEnvironment('CEF_ANIM_DELAY_MS', defaultValue: 0); +// --dart-define=CEF_REVEAL_MS=2000 => with cef_host FLUTTER_CEF_BORN_HIDDEN=1, reveal +// tiles ONE AT A TIME (setVisible(true)) every N ms so first-frame establishment is +// serialized (one concurrent first-frame allocation), the Chrome background-tab model. +const int kRevealMs = int.fromEnvironment('CEF_REVEAL_MS', defaultValue: 0); +// --dart-define=CEF_REAL_URLS=true => load REAL websites (heavy JS/WebGL/video, real +// network + first-paint timing) instead of the synthetic anim HTML, cycling the list +// below across the tiles. The hardest stress: real establishment latency + real GPU load. +const bool kRealUrls = bool.fromEnvironment('CEF_REAL_URLS'); +const List _realUrls = [ + 'https://bruno-simon.com', // WebGL 3D driving-game portfolio (brutal) + 'https://www.shadertoy.com', // GPU fragment shaders + 'https://webglsamples.org/aquarium/aquarium.html', // animated WebGL aquarium + 'https://threejs.org', // WebGL + 'https://earth.google.com/web', // 3D globe (very heavy) + 'https://www.google.com/maps', // WebGL maps + 'https://www.windy.com', // animated WebGL weather maps + 'https://www.youtube.com', // video grid + 'https://www.twitch.tv', // live video + 'https://playcanvas.com', // WebGL 3D engine demos + 'https://pixijs.com', // WebGL 2D + 'https://www.apple.com/macbook-pro/', // scroll-driven video + 'https://www.nytimes.com', // heavy media/ads + 'https://www.reddit.com', // infinite scroll + media + 'https://www.amazon.com', // heavy commerce + 'https://www.airbnb.com', // maps + image grids + 'https://codepen.io/trending', // live code demos + 'https://www.tradingview.com/chart/', // live charts + 'https://www.flightradar24.com', // live animated map (moving planes) + 'https://www.spotify.com', // web player landing +]; +const int kStep = 4; +// --dart-define=CEF_CHURN=true => oscillate create-all / dispose-all every 12s, +// to leak-test create/dispose reclamation (procs/RSS/FD must return to baseline). +const bool kChurn = bool.fromEnvironment('CEF_CHURN'); +const String _statsPath = '/tmp/cef_stress.jsonl'; + +const _animHtml = ''' + +
fps …
+'''; + +// Animated page that holds STATIC for [delayMs] after load, then starts the CSS +// animation + rAF loop. Models "establish the first frame before animating". +String _delayedAnimHtml(int delayMs) => ''' + +
warming…
+'''; + +// Static page: a gradient + a box, painted ONCE — no CSS animation, no rAF, no +// timers, so the compositor produces one frame then idles (models static content). +const _staticHtml = ''' + +
+
static
'''; + +void main() => runApp(const StressApp()); + +class StressApp extends StatefulWidget { + const StressApp({super.key}); + @override + State createState() => _StressAppState(); +} + +class _StressAppState extends State { + final List _controllers = []; + final Set _loaded = {}; + int _nextId = 0; + + // rolling frame-timing window (microseconds of total span per frame). + final List _frameMicros = []; + String _stats = 'warming up…'; + late final Stopwatch _sw = Stopwatch()..start(); + Timer? _report; + + @override + void initState() { + super.initState(); + if (kCreateDelayMs <= 0) { + for (var i = 0; i < kInitialViews; i++) { + _add(); + } + } else { + // Gradual bring-up: one view every kCreateDelayMs, mimicking a human opening + // windows one at a time (never a 12-at-once create burst). + _add(); + var created = 1; + Timer.periodic(Duration(milliseconds: kCreateDelayMs), (t) { + if (created >= kInitialViews) { + t.cancel(); + return; + } + _add(); + created++; + }); + } + if (kRevealMs > 0) { + // Serial reveal: ensure all hidden, then show one every kRevealMs (with cef_host + // born-hidden, each establishes alone against an already-steady set). + Timer(const Duration(milliseconds: 600), () { + for (final c in _controllers) { + c.setVisible(false); + } + var shown = 0; + Timer.periodic(Duration(milliseconds: kRevealMs), (t) { + if (shown >= _controllers.length) { + t.cancel(); + return; + } + _controllers[shown].setVisible(true); + shown++; + }); + }); + } else if (kLiveCap > 0) { + // Live-cap masking: after browsers come up, keep only kLiveCap visible at a time + // and rotate. _liveStart is the index of the first visible tile in the rotating + // window; reapply visibility on a cadence so each tile gets a live turn to paint. + Timer(const Duration(milliseconds: 800), _applyLiveCap); + Timer.periodic(const Duration(milliseconds: 2500), (_) { + _liveStart = (_liveStart + kLiveCap) % kInitialViews; + _applyLiveCap(); + }); + } + SchedulerBinding.instance.addTimingsCallback(_onTimings); + _report = Timer.periodic(const Duration(seconds: 2), (_) => _emit()); + if (kForceRepaint) { + // Force a Flutter frame ~60fps so the Texture widgets get pulled every frame. + Timer.periodic(const Duration(milliseconds: 16), (_) { + if (mounted) setState(() {}); + }); + } + if (kChurn) { + Timer.periodic(const Duration(seconds: 12), (_) { + if (_controllers.isEmpty) { + for (var i = 0; i < kInitialViews; i++) { + _add(); + } + } else { + while (_controllers.isNotEmpty) { + _remove(); + } + } + }); + } + } + + void _onTimings(List timings) { + for (final t in timings) { + _frameMicros.add(t.totalSpan.inMicroseconds); + } + if (_frameMicros.length > 600) { + _frameMicros.removeRange(0, _frameMicros.length - 600); + } + } + + void _emit() { + if (_frameMicros.isEmpty) return; + final xs = List.from(_frameMicros)..sort(); + final avg = xs.reduce((a, b) => a + b) / xs.length / 1000.0; + final p90 = xs[(xs.length * 0.90).floor().clamp(0, xs.length - 1)] / 1000.0; + final p99 = xs[(xs.length * 0.99).floor().clamp(0, xs.length - 1)] / 1000.0; + // jank = frames slower than 1.5x a 60Hz budget (~25ms). + final jank = xs.where((m) => m > 25000).length / xs.length * 100; + final row = { + 'tMs': _sw.elapsedMilliseconds, + 'views': _controllers.length, + 'profile': kProfile ?? 'ephemeral', + 'avgMs': double.parse(avg.toStringAsFixed(2)), + 'p90Ms': double.parse(p90.toStringAsFixed(2)), + 'p99Ms': double.parse(p99.toStringAsFixed(2)), + 'jankPct': double.parse(jank.toStringAsFixed(1)), + }; + // ignore: avoid_print + print('CEF_STRESS ${jsonEncode(row)}'); + try { + File(_statsPath).writeAsStringSync('${jsonEncode(row)}\n', + mode: FileMode.append); + } catch (_) {} + _frameMicros.clear(); + if (mounted) { + setState(() => _stats = + 'views=${_controllers.length} avg=${row['avgMs']}ms p90=${row['p90Ms']}ms jank=${row['jankPct']}%'); + } + } + + int _liveStart = 0; + + // Apply the rotating live window: tiles in [_liveStart, _liveStart+kLiveCap) are + // CEF-visible (live), all others hidden (frozen on last frame). Re-applied on a timer. + void _applyLiveCap() { + final n = _controllers.length; + if (n == 0) return; + for (var i = 0; i < n; i++) { + final inWindow = ((i - _liveStart + kInitialViews) % kInitialViews) < kLiveCap; + _controllers[i].setVisible(inWindow); + } + } + + String? _poolProfile(int id) => + kProfile == null ? null : '$kProfile-${id % kPoolSize}'; + + final Map _urlBySession = {}; + final Map _recreateCount = {}; + + CefWebController _makeController() { + final id = _nextId++; + final c = CefWebController(profile: _poolProfile(id)); + if (kRealUrls) _urlBySession[c.sessionId] = _realUrls[id % _realUrls.length]; + String tag(String u) => u.length > 22 ? u.substring(0, 22) : u; + c.onPageStarted = (url) { + // ignore: avoid_print + print('CEF_STRESS_LOAD view=$id pageStarted ${tag(url)}'); + // Real-URL mode: let the real page load (no synthetic HTML injection). + if (!kRealUrls && _loaded.add(id)) { + c.loadHtmlString(kStatic + ? _staticHtml + : kAnimDelayMs > 0 + ? _delayedAnimHtml(kAnimDelayMs) + : _animHtml); + } + }; + c.onPageFinished = (url) { + // ignore: avoid_print + print('CEF_STRESS_LOAD view=$id pageFinished ${tag(url)}'); + }; + c.onLoadError = (e) { + // ignore: avoid_print + print('CEF_STRESS_LOAD view=$id loadError ${e.errorCode} ${tag(e.url)}'); + }; + c.onPaintStalled = () { + // ignore: avoid_print + print('CEF_STRESS_RECREATE view=$id stalled (attempt ${_recreateCount[c.sessionId] ?? 0})'); + if (!kRecreateOnStall) return; // patience-only mode: rely on the pump + // Bounded, backed-off recreate. The paintStalled signal REPEATS while blank, so we + // gate on a per-tile attempt count: recreate is destructive (restarts the page + // load), so cap it — a still-loading heavy page paints on its own (patience); only a + // genuinely-stuck tile needs the (serialized → low-contention → succeeds) recreate. + final n = _recreateCount[c.sessionId] ?? 0; + if (n >= kMaxRecreates) return; // exhausted → leave it to the pump, never churn + final idx = _controllers.indexOf(c); + if (idx < 0) return; + final nc = _makeController(); + _recreateCount[nc.sessionId] = n + 1; // carry the count to the replacement + setState(() => _controllers[idx] = nc); + c.dispose(); + }; + return c; + } + + void _add() { + setState(() => _controllers.add(_makeController())); + } + + void _remove() { + if (_controllers.isEmpty) return; + final c = _controllers.removeLast(); + setState(() {}); + c.dispose(); + } + + @override + void dispose() { + _report?.cancel(); + SchedulerBinding.instance.removeTimingsCallback(_onTimings); + for (final c in _controllers) { + c.dispose(); + } + super.dispose(); + } + + @override + Widget build(BuildContext context) { + final n = _controllers.length; + final cols = (n <= 1) ? 1 : (n <= 4) ? 2 : (n <= 9) ? 3 : (n <= 16) ? 4 : 5; + return MaterialApp( + debugShowCheckedModeBanner: false, + home: Scaffold( + body: Column( + children: [ + Container( + color: const Color(0xFF0B1220), + padding: const EdgeInsets.all(8), + child: Row( + children: [ + Expanded( + child: Text('stress — $_stats', + style: const TextStyle( + color: Colors.white, + fontWeight: FontWeight.w600)), + ), + TextButton( + onPressed: () { + for (var i = 0; i < kStep; i++) { + _add(); + } + }, + child: const Text('+$kStep')), + TextButton( + onPressed: () { + for (var i = 0; i < kStep; i++) { + _remove(); + } + }, + child: const Text('-$kStep')), + ], + ), + ), + Expanded( + child: kTilePx > 0 + ? SingleChildScrollView( + child: Wrap( + children: [ + for (final c in _controllers) + SizedBox( + width: kTilePx.toDouble(), + height: kTilePx.toDouble(), + child: Padding( + padding: const EdgeInsets.all(2), + child: CefWebView( + key: ValueKey(c.sessionId), + url: _urlBySession[c.sessionId] ?? 'about:blank', + controller: c, + profile: c.profile), + ), + ), + ], + ), + ) + : GridView.count( + crossAxisCount: cols, + children: [ + for (final c in _controllers) + Padding( + padding: const EdgeInsets.all(2), + child: CefWebView( + key: ValueKey(c.sessionId), + url: _urlBySession[c.sessionId] ?? 'about:blank', + controller: c, + profile: c.profile), + ), + ], + ), + ), + ], + ), + ), + ); + } +} diff --git a/lib/src/cef_web_controller.dart b/lib/src/cef_web_controller.dart index 8f1220a..d2e34cf 100644 --- a/lib/src/cef_web_controller.dart +++ b/lib/src/cef_web_controller.dart @@ -109,10 +109,16 @@ class CefWebController { /// `"crashed"` for a generic process death. void Function(String reason)? onProcessGone; - /// C1: the browser was created but never painted its first frame, even after the - /// native host re-kicked a repaint. The texture is (still) blank with no other - /// signal — the consumer can use this to recover (e.g. recreate the view) rather - /// than leaving a permanently blank tile. + /// The browser was created but still hasn't painted its first frame after the + /// native host's grace window (~10s, env-tunable via `FLUTTER_CEF_FIRSTPAINT_MS`), + /// so the texture is (still) blank. The consumer can recover by recreating the view. + /// + /// REPEATING signal: this fires again roughly every grace window for as long as the + /// view stays blank, and stops only once it paints (or the controller is disposed). + /// So any DESTRUCTIVE recovery (recreate) MUST be bounded/debounced — keep a per-view + /// attempt counter or backoff rather than recreating on every call (recreating a + /// merely-slow heavy page on each tick just restarts its load and churns). See + /// `example/lib/stress_probe.dart` (`_recreateCount` / `kMaxRecreates`) for the pattern. VoidCallback? onPaintStalled; /// The caret rect (view-local logical px) of the active IME composition. diff --git a/lib/src/cef_web_view.dart b/lib/src/cef_web_view.dart index cf852f2..e6b5442 100644 --- a/lib/src/cef_web_view.dart +++ b/lib/src/cef_web_view.dart @@ -55,6 +55,7 @@ class CefWebView extends StatefulWidget { this.enableCdp = false, this.agentControl = false, this.profile, + this.renderScale, }) : assert(!(enableCdp && !agentControl && profile != null && profile != ''), 'enableCdp cannot be combined with a named profile: CDP-over-TCP ' 'exposes an unauthenticated localhost port that could read the ' @@ -121,6 +122,19 @@ class CefWebView extends StatefulWidget { /// [enableCdp] (open port), but compatible with [agentControl] (private pipe). final String? profile; + /// Device-pixel-ratio the page renders at, overriding the screen dpr from + /// [MediaQuery]. Set this for crispness when the view is visually SCALED by an + /// ancestor transform rather than relaid out — e.g. an infinite-canvas zoom that + /// applies `Transform.scale`: the widget's logical size is unchanged, so without + /// this the OSR buffer stays at 1×-zoom resolution and the transform upscales it + /// (blurry). Pass `screenDpr × zoom` to render at the on-screen pixel density. + /// + /// The OSR buffer is `logicalSize × renderScale` pixels, so cost/memory grow with + /// the square — the consumer should clamp it (and quantize/debounce zoom so it + /// doesn't re-render every frame of a pinch). The native side guards `dpr ≤ 8`. + /// Null (default) uses `MediaQuery.devicePixelRatio`. + final double? renderScale; + @override State createState() => _CefWebViewState(); } @@ -133,6 +147,7 @@ class _CefWebViewState extends State FocusNode? _ownFocusNode; int? _textureId; Size? _lastSize; + double? _lastDpr; bool _creating = false; // ── IME / text input ───────────────────────────────────────────── @@ -207,7 +222,11 @@ class _CefWebViewState extends State // controller so we don't read a deactivated MediaQuery or resize a // torn-down session. if (!mounted) return; - final dpr = MediaQuery.maybeOf(context)?.devicePixelRatio ?? 1.0; + // Effective render dpr: an explicit [renderScale] (e.g. screenDpr × canvas zoom, + // for crispness when an ancestor transform scales the view) overrides the screen + // dpr from MediaQuery. Clamped to the native guard range (dpr ≤ 8). + final dpr = (widget.renderScale ?? MediaQuery.maybeOf(context)?.devicePixelRatio ?? 1.0) + .clamp(0.5, 8.0); final w = size.width.round(); final h = size.height.round(); if (w <= 0 || h <= 0) return; @@ -233,11 +252,13 @@ class _CefWebViewState extends State } return; } - if (_textureId != null && _lastSize != size) { + if (_textureId != null && (_lastSize != size || _lastDpr != dpr)) { _lastSize = size; - // Resize on every layout change. The native session (CefWebSession) flow-controls the - // sends to cef_host's paint rate — it keeps one resize in flight and coalesces to the - // latest size — so the page reflows live during the drag without us pacing here. + _lastDpr = dpr; + // Resize on every layout OR dpr change (the latter = a zoom that re-renders the page + // at a higher pixel density for crispness). The native session (CefWebSession) + // flow-controls the sends to cef_host's paint rate — it keeps one resize in flight and + // coalesces to the latest — so the page reflows/re-rasterizes live without us pacing. _controller.resize(w, h, dpr: dpr); } } diff --git a/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift b/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift index 2359d88..dbe7b0b 100644 --- a/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift +++ b/packages/flutter_cef_macos/macos/Classes/CefProfileHost.swift @@ -128,17 +128,37 @@ final class CefProfileHost { private var createEnqueued: Set = [] // browserIds whose create has been sent // Per-host create pacing (guarded by writeLock). A BURST of opCreateBrowser frames - // would otherwise make cef_host run a pile of browser creates concurrently, contending - // the one shared GPU/Viz accelerated-surface handshake so later browsers get NO surface - // and never paint. So we send creates ONE AT A TIME and advance only when cef_host acks - // the create (H3: opCreated, off OnAfterCreated) — serialized by COMPLETION, not a - // wall-clock guess. `createAckTimeout` is a backstop so a create that never acks (a - // wedged renderer) can't stall the queue forever. `createInFlight` is the browserId we - // are currently awaiting the ack for. + // would otherwise make cef_host run a pile of browser creates concurrently, each doing + // its first-frame GPU shared-image allocation against the one shared GPU/Viz process at + // the same instant — that allocation RACES and the losers silently Stop() (permanent + // blank tile). PROVEN: 12 animated tiles created concurrently → ~9/12 paint; created + // ONE AT A TIME → 12/12 (and all 12 then animate at 60fps — steady state is fine, only + // concurrent ESTABLISHMENT was the problem). So we admit creates through a SLIDING + // WINDOW: at most `maxCreateInFlight` browsers may be establishing (awaiting first paint) + // at once, and we gate each slot's release on that browser's FIRST PAINT + // (firstPresentArrived), NOT the bind ack (opCreated). Window=1 is strict serial. A + // window of K is materially safer than "K all-at-once": only the K still-establishing + // browsers contend the first-frame allocator (established ones just blit from an existing + // surface), and the K creates stagger by create+first-paint latency rather than firing + // simultaneously. `createAckTimeout` is the per-browser paint backstop so a + // bound-but-never-painting browser can't hold its slot forever. `createInFlight` is the + // set of browserIds currently occupying an establishment slot. private var createSendQueue: [(id: UInt32, session: CefWebSession, url: String)] = [] - private var createPacerRunning = false - private var createInFlight: UInt32? - private let createAckTimeout: TimeInterval = 8 + private var createInFlight: Set = [] + private let maxCreateInFlight: Int = { + if let s = ProcessInfo.processInfo.environment["FLUTTER_CEF_ESTAB_WINDOW"], + let n = Int(s), n > 0 { return n } + return 3 // K=3: ~3x faster cascade than strict serial on BOTH median and last-tile + // first-paint for real-site boards (measured: median 36→10s, last 41→21s, + // 20 real sites). The rare all-animation-burst knock-out is caught by the + // watchdog→recreate (never blank). See specs/osr-many-views.md. + }() + private let createAckTimeout: TimeInterval = { + if let s = ProcessInfo.processInfo.environment["FLUTTER_CEF_CREATE_TIMEOUT_MS"], + let ms = Double(s) { return ms / 1000.0 } + return 8 // backstop for a browser that binds but never first-paints; generous so a + // heavy real site that's slow to composite isn't de-serialized prematurely. + }() // C1 first-present watchdog (guarded by presentLock). browserIds awaiting their FIRST // opPresent: if none arrives within the deadline we re-kick via opInvalidate, then (if @@ -151,6 +171,12 @@ final class CefProfileHost { // the watchdog must NOT treat that as a stall (work_canvas creates tiles already // off-screen as a normal lazy-spawn pattern). Guarded by presentLock. private var hiddenBrowsers: Set = [] + // At most one live checkFirstPresent chain per browserId. The watchdog re-arms itself + // (repeating paintStalled signal) and noteVisibility re-arms on unhide, so without this + // a hide/show flap of a still-blank tile would accumulate parallel chains (each one + // re-kicking + logging + emitting paintStalled every firstPaintGrace forever). Guarded + // by presentLock; cleared when a chain terminates (paint / hidden / dead / dispose). + private var watchdogArmed: Set = [] // Invoked (off the reader thread) when an ad-hoc host refuses to load a named // profile (no creds were written — see F.5). The plugin tears this host down @@ -520,43 +546,50 @@ final class CefProfileHost { pumpCreateQueue() } - /// Send the NEXT queued create and wait for cef_host to ack it (opCreated) before - /// sending the following one (H3) — so browsers create one-at-a-time and each one's - /// render + accelerated-surface handshake completes before the next contends the shared - /// GPU/Viz process. `createAckTimeout` backstops a create that never acks (wedged - /// renderer). A create whose browser was disposed while queued is skipped. + /// Send the NEXT queued create and wait for that browser's FIRST PAINT (firstPresentArrived, + /// off opPresent) before sending the following one — so each browser's first-frame GPU + /// allocation completes before the next one contends, serializing establishment and + /// avoiding the concurrent-first-frame race. `createAckTimeout` backstops a browser that + /// binds but never paints so it can't stall the queue forever. A create whose browser was + /// disposed while queued is skipped. private func pumpCreateQueue() { - writeLock.lock() - // H6: never pump on a dead/dying host — the queue was abandoned in - // shutdown()/handleHostDeath(); pumping would sendCreate into a closed pipe and a - // stuck `createPacerRunning` could wedge a reused host. - if !running || crashed || createPacerRunning || createSendQueue.isEmpty { + // Fill the sliding window: dispatch creates while a slot is free. Each dispatched + // browser holds its slot until its first paint (or backstop) releases it via + // advanceCreatePacer, which re-pumps. + while true { + writeLock.lock() + // H6: never pump on a dead/dying host — the queue was abandoned in + // shutdown()/handleHostDeath(); pumping would sendCreate into a closed pipe and a + // stuck slot could wedge a reused host. + if !running || crashed || createInFlight.count >= maxCreateInFlight || + createSendQueue.isEmpty { + writeLock.unlock() + return + } + let next = createSendQueue.removeFirst() + createInFlight.insert(next.id) writeLock.unlock() - return - } - createPacerRunning = true - let next = createSendQueue.removeFirst() - createInFlight = next.id - writeLock.unlock() - browsersLock.lock() - let stillLive = browsers[next.id] != nil - browsersLock.unlock() - guard stillLive else { - // Disposed while queued — drop it and advance. M1: trampoline rather than - // recurse synchronously (a "close all tiles" mid-burst could skip many disposed - // creates and blow the stack). - writeLock.lock(); createPacerRunning = false; createInFlight = nil; writeLock.unlock() - DispatchQueue.global().async { [weak self] in self?.pumpCreateQueue() } - return - } + browsersLock.lock() + let stillLive = browsers[next.id] != nil + browsersLock.unlock() + guard stillLive else { + // Disposed while queued — free the slot and continue filling (no recursion; + // a "close all tiles" mid-burst could skip many disposed creates). + writeLock.lock(); createInFlight.remove(next.id); writeLock.unlock() + continue + } - sendCreate(next.id, next.session, next.url) - armFirstPresentWatchdog(next.id) // C1 - // H3: advance on the create ack (opCreated, via advanceCreatePacer in the reader); - // this timer is only the backstop if that ack never comes. - DispatchQueue.global().asyncAfter(deadline: .now() + createAckTimeout) { [weak self] in - self?.advanceCreatePacer(after: next.id, timedOut: true) + // Arm the watchdog (insert into firstPresentPending) BEFORE sendCreate so a first + // opPresent can never be observed before the id is registered as pending (which would + // leave a healthy painting tile stuck "pending" → false perpetual paintStalled). + armFirstPresentWatchdog(next.id) // C1 + sendCreate(next.id, next.session, next.url) + // Release this slot on the browser's FIRST PAINT (firstPresentArrived, in the + // reader); this timer is only the backstop if it binds but never paints in time. + DispatchQueue.global().asyncAfter(deadline: .now() + createAckTimeout) { [weak self] in + self?.advanceCreatePacer(after: next.id, timedOut: true) + } } } @@ -565,16 +598,15 @@ final class CefProfileHost { /// Idempotent: only the FIRST of {ack, timeout} for the current in-flight id advances. private func advanceCreatePacer(after browserId: UInt32, timedOut: Bool) { writeLock.lock() - guard createInFlight == browserId else { writeLock.unlock(); return } - createInFlight = nil - createPacerRunning = false + // Idempotent: only the FIRST of {first-paint, timeout} for this id frees its slot. + guard createInFlight.remove(browserId) != nil else { writeLock.unlock(); return } writeLock.unlock() if timedOut { - NSLog("[cef] profile '\(profileId)': create-ack timeout for browser \(browserId) — advancing pacer") + NSLog("[cef] profile '\(profileId)': create-ack timeout for browser \(browserId) — freeing establishment slot") } - // Dispatch the next create OFF the reader thread (advanceCreatePacer is called from - // it on opCreated): pumpCreateQueue -> sendCreate writes to the same pipe the reader - // reads, and the reader must never block on a write. + // Refill the freed slot OFF the reader thread (advanceCreatePacer is called from it on + // first paint): pumpCreateQueue -> sendCreate writes to the same pipe the reader reads, + // and the reader must never block on a write. DispatchQueue.global().async { [weak self] in self?.pumpCreateQueue() } } @@ -588,21 +620,65 @@ final class CefProfileHost { // MARK: C1 first-present watchdog - /// Arm the first-present watchdog for a freshly-sent create. If no opPresent arrives - /// within ~3s we re-kick a repaint (opInvalidate); if still blank ~4s later we surface - /// paintStalled so the consumer can recover (recreate) instead of a silent blank tile. + /// Total grace for a browser to deliver its FIRST frame before the watchdog declares it + /// stalled (→ consumer recreates). Cancelled the instant ANY frame arrives, so this only + /// bounds the GENUINELY-blank case — it does NOT slow content that paints quickly. + /// Must be generous: a heavy real site (WebGL, 3D, huge bundle) can take several seconds + /// to composite its first frame, and recreating it just restarts that heavy load (churn). + /// Env-tunable. + private let firstPaintGrace: TimeInterval = { + if let s = ProcessInfo.processInfo.environment["FLUTTER_CEF_FIRSTPAINT_MS"], + let ms = Double(s) { return ms / 1000.0 } + return 10.0 + }() + + /// Arm the first-present watchdog for a freshly-sent create: after `firstPaintGrace` + /// with no frame at all, run a liveness check. private func armFirstPresentWatchdog(_ browserId: UInt32) { - presentLock.lock(); firstPresentPending.insert(browserId); presentLock.unlock() - DispatchQueue.global().asyncAfter(deadline: .now() + 3) { [weak self] in - self?.checkFirstPresent(browserId, phase: 1) + presentLock.lock() + firstPresentPending.insert(browserId) + let already = watchdogArmed.contains(browserId) + if !already { watchdogArmed.insert(browserId) } + presentLock.unlock() + guard !already else { return } // a chain is already live for this id + DispatchQueue.global().asyncAfter(deadline: .now() + firstPaintGrace) { [weak self] in + self?.checkFirstPresent(browserId) } } - /// Reader: a browser painted its first frame — cancel its watchdog. + /// Reader: a browser painted its first frame — cancel its watchdog. (Advancing the + /// create pacer is NOT done here: the pacer advances on a SETTLE delay after first + /// paint — see the reader — because a 1-frame-old browser isn't stably established yet + /// and would be knocked out by the next create's contention.) private func firstPresentArrived(_ browserId: UInt32) { - presentLock.lock(); firstPresentPending.remove(browserId); presentLock.unlock() + presentLock.lock() + firstPresentPending.remove(browserId) + watchdogArmed.remove(browserId) // the chain ends; an unhide may re-arm a fresh one + presentLock.unlock() } + /// How many present frames a browser must deliver before the pacer admits the next + /// create. Gating on the bare first frame advances too eagerly — a 1-frame-old browser + /// gets knocked back out by the next create's first-frame GPU allocation (paints 1-2 + /// frames then stops). Requiring a few consecutive frames proves it's stably producing + /// before the next contends. Adaptive + fast: a healthy 60fps tile trips this in a few + /// frames (~tens of ms) vs a fixed time settle. Env-tunable. + private let estabStableFrames: Int = { + if let s = ProcessInfo.processInfo.environment["FLUTTER_CEF_ESTAB_FRAMES"], + let n = Int(s), n > 0 { return n } + return 6 + }() + /// Settle window after a browser's FIRST paint as the OTHER pacer-advance trigger (the + /// pacer advances on stable-frames OR this settle, whichever comes first). The frame + /// threshold is the fast path for continuously-animating content (hits it in ~tens of + /// ms); the settle is the path for STATIC content that paints a short burst on load then + /// idles (a real website) and would never reach the frame threshold. Env-tunable. + private let estabSettle: TimeInterval = { + if let s = ProcessInfo.processInfo.environment["FLUTTER_CEF_ESTAB_SETTLE_MS"], + let ms = Double(s) { return ms / 1000.0 } + return 0.4 + }() + /// C1: track WasHidden state (peeked from opSetVisible). A hidden browser produces no /// frames, so the watchdog suspends rather than flagging it stalled. On UNHIDE, re-arm /// the watchdog for a browser that's still blank, so a genuinely-stuck now-visible tile @@ -612,40 +688,59 @@ final class CefProfileHost { if !visible { hiddenBrowsers.insert(browserId) presentLock.unlock() + // A browser hidden BEFORE its first paint produces no frames (PumpBeginFrame gates + // on slot->visible), so it would never advance the create-pacer via first-paint and + // the watchdog suspends it too — pinning its establishment slot until the backstop. + // A hidden tile isn't contending the first-frame GPU allocator, so it must not count + // against the window: free its slot now (idempotent no-op if it already painted / + // wasn't in flight). This is the dominant case — work_canvas creates tiles off-screen. + advanceCreatePacer(after: browserId, timedOut: false) return } hiddenBrowsers.remove(browserId) - let reArm = firstPresentPending.contains(browserId) + // Re-arm only if still blank AND no chain is already live (dedup across flapping). + let reArm = firstPresentPending.contains(browserId) && !watchdogArmed.contains(browserId) + if reArm { watchdogArmed.insert(browserId) } presentLock.unlock() guard reArm else { return } - DispatchQueue.global().asyncAfter(deadline: .now() + 3) { [weak self] in - self?.checkFirstPresent(browserId, phase: 1) + DispatchQueue.global().asyncAfter(deadline: .now() + firstPaintGrace) { [weak self] in + self?.checkFirstPresent(browserId) } } - private func checkFirstPresent(_ browserId: UInt32, phase: Int) { + /// Liveness check for a browser that hasn't produced its first frame within the grace. + /// PATIENCE, not destruction: the create-pacer serializes establishment so a blank tile + /// is almost always merely SLOW (heavy page, saturated GPU), not dead — and the + /// begin-frame pump keeps running, so it paints on its own once resources free. So we: + /// 1) advance the pacer ONCE (a slow tile must not block the rest of the queue), then + /// 2) send a cheap re-kick and REPORT paintStalled — a REPEATING signal (re-armed each + /// grace while still blank) so the consumer owns recovery policy (e.g. a bounded, + /// backed-off recreate) without this layer ever churning a still-loading page. + /// `firstPresentArrived` (real first frame) removes it from the pending set, ending the + /// loop. Suspended (not retired) while hidden; re-armed on unhide. + private func checkFirstPresent(_ browserId: UInt32) { presentLock.lock() let stillBlank = firstPresentPending.contains(browserId) let hidden = hiddenBrowsers.contains(browserId) - // Don't retire the watch while hidden — a hidden browser is suspended, not stalled; - // noteVisibility re-arms it on unhide if still blank. - if stillBlank && !hidden && phase >= 2 { firstPresentPending.remove(browserId) } + // This chain terminates on paint or hide (re-armed fresh on unhide); release the + // single-instance flag so a later unhide can start one new chain. The continuing + // (still-blank, visible) path below keeps it armed by NOT clearing here. + if !stillBlank || hidden { watchdogArmed.remove(browserId) } presentLock.unlock() guard stillBlank else { return } // it painted — nothing to do guard !hidden else { return } // hidden by design — suspended; re-armed on unhide - // Only act while the browser is still live + the host healthy. browsersLock.lock(); let live = browsers[browserId] != nil; browsersLock.unlock() writeLock.lock(); let healthy = running && !crashed; writeLock.unlock() guard live, healthy else { firstPresentArrived(browserId); return } - if phase == 1 { - NSLog("[cef] profile '\(profileId)': browser \(browserId) hasn't painted — re-kicking (opInvalidate)") - send(browserId, Self.opInvalidate, []) - DispatchQueue.global().asyncAfter(deadline: .now() + 4) { [weak self] in - self?.checkFirstPresent(browserId, phase: 2) - } - } else { - NSLog("[cef] profile '\(profileId)': browser \(browserId) never painted — surfacing paintStalled") - onPaintStalled?(browserId) + // Unblock the queue once (idempotent: only the in-flight id advances). + advanceCreatePacer(after: browserId, timedOut: false) + // Cheap nudge (harmless if it's just slow; helps a merely-dropped first frame). + send(browserId, Self.opInvalidate, []) + NSLog("[cef] profile '\(profileId)': browser \(browserId) still blank after \(Int(firstPaintGrace))s — reporting paintStalled (consumer may recreate)") + onPaintStalled?(browserId) + // Re-arm: keep watching on a backoff until it paints (firstPresentArrived clears it). + DispatchQueue.global().asyncAfter(deadline: .now() + firstPaintGrace) { [weak self] in + self?.checkFirstPresent(browserId) } } @@ -715,7 +810,14 @@ final class CefProfileHost { presentLock.lock() firstPresentPending.remove(browserId) hiddenBrowsers.remove(browserId) + watchdogArmed.remove(browserId) presentLock.unlock() + // Free any create-pacer establishment slot this browser still held (disposed before + // first paint) and re-fill the window — otherwise the slot stays pinned until the 8s + // backstop, throttling new creates on this host. Idempotent (no-op if not in flight); + // takes writeLock + re-pumps off-thread, so it must be OUTSIDE all locks here. Mirrors + // the createInFlight.removeAll() that shutdown()/handleHostDeath() already do. + advanceCreatePacer(after: browserId, timedOut: false) return remaining } @@ -738,8 +840,7 @@ final class CefProfileHost { // queued-never-sent sessions don't linger. The browsers map still holds them, so // disposeSession/onHostDied path cleans them up. createSendQueue.removeAll() - createPacerRunning = false - createInFlight = nil + createInFlight.removeAll() writeLock.unlock() // CEF-2a/b: drop ALL relays (each a listener + any client) before tearing down // the pipe, so none keeps bridging into a closing fd. Snapshot under the lock, @@ -948,7 +1049,11 @@ final class CefProfileHost { // not the session. handleTargetId(bid, String(bytes: payload, encoding: .utf8)) } else if op == Self.opCreated { - advanceCreatePacer(after: bid, timedOut: false) // H3: create acked → send the next + // Bind ack only — intentionally does NOT advance the pacer anymore. We gate the + // next create on this browser's first PAINT (firstPresentArrived), not its bind, + // so establishment is serialized. opCreateFailed / the paint-timeout backstop + // still advance for the bound-but-never-painted / failed cases. (No-op here.) + _ = bid } else if op == Self.opCreateFailed { handleCreateFailed(bid) // H7 } else { @@ -957,10 +1062,32 @@ final class CefProfileHost { // C1: detect the FIRST present under the browsersLock we already hold, via a // per-session flag, so the watchdog-cancel (presentLock) fires once per browser // instead of acquiring a second lock on every (up to 60fps) present frame. - let firstPaint = op == Self.opPresent && session != nil && !session!.firstPresentSeen - if firstPaint { session!.firstPresentSeen = true } + var firstPaint = false + var reachedStableFrames = false + if op == Self.opPresent, let s = session { + s.presentCount += 1 + if s.presentCount == 1 { s.firstPresentSeen = true; firstPaint = true } + if s.presentCount == estabStableFrames { reachedStableFrames = true } + } browsersLock.unlock() - if firstPaint { firstPresentArrived(bid) } // cancel the watchdog (once) + if firstPaint { + if ProcessInfo.processInfo.environment["FLUTTER_CEF_DEBUG"] != nil { + NSLog("[cef] FIRSTPAINT browser \(bid)") // one-shot, timestamped — cascade probe + } + // A browser that painted ANY frame is alive + has content (NOT blank) — cancel + // the watchdog now. (Gating the cancel on the frame threshold falsely recreated + // STATIC real sites that paint a short burst < threshold then idle.) + firstPresentArrived(bid) + // Pacer settle path: admit the next create after the settle window — covers + // static content that won't reach the frame threshold. The threshold below is + // the faster path for continuously-animating content; whichever fires first + // wins (advanceCreatePacer is idempotent). + let id = bid + DispatchQueue.global().asyncAfter(deadline: .now() + estabSettle) { [weak self] in + self?.advanceCreatePacer(after: id, timedOut: false) + } + } + if reachedStableFrames { advanceCreatePacer(after: bid, timedOut: false) } session?.handleFrame(op, payload) } } @@ -989,14 +1116,13 @@ final class CefProfileHost { // H6: abandon paced creates — the host is gone. Sessions stay in `browsers`, so // the onHostDied → plugin path still emits processGone for each queued one. createSendQueue.removeAll() - createPacerRunning = false - createInFlight = nil + createInFlight.removeAll() let p = process // H5: TAKE the posix_spawn pid (zero it) so this reaper is the SOLE owner of its // waitpid — a later terminateProcess()/shutdown() then sees 0 and won't // double-reap a pid this thread is about to harvest (which could kill an // OS-recycled pid). If it's wedged and we can't reap within the grace window - // below, we hand it back (restoreSpawnedPid) so terminateProcess can SIGKILL it. + // below, we SIGKILL + reap it ourselves so it never leaks as a zombie/orphan. let pid = spawnedPid spawnedPid = 0 let died = onHostDied @@ -1053,24 +1179,21 @@ final class CefProfileHost { } usleep(50_000) } - // H5: still alive after the grace window (wedged child that didn't exit on - // EOF) — hand the pid back so terminateProcess()/shutdown() can SIGTERM/SIGKILL - // + reap it. Without this, a taken-but-unreaped pid would never be killed. - if !reaped { self?.restoreSpawnedPid(pid) } + // H5: still alive after the grace window (a wedged child that didn't exit on + // EOF). Don't merely hand it back — the clean-shutdown path may never call + // terminateProcess() again, leaving a zombie/orphan cef_host. SIGKILL + reap it + // right here. We exclusively own this pid (spawnedPid was zeroed above) and it + // is still unreaped, so it can't be a recycled or relaunched pid. + if !reaped { + kill(pid, SIGKILL) + var raw: Int32 = 0 + waitpid(pid, &raw, 0) // blocking reap, off the main thread + } } DispatchQueue.main.async { died?(status) } } } - /// H5: hand a TAKEN-but-unreaped pid back to `spawnedPid` so terminateProcess() can - /// finish it (SIGTERM/SIGKILL + reap). No-op if a relaunch already installed a new - /// pid (so a racing relaunch is never clobbered). - private func restoreSpawnedPid(_ pid: pid_t) { - writeLock.lock() - if spawnedPid == 0 { spawnedPid = pid } - writeLock.unlock() - } - /// Process/profile-level inbound frames (browserId 0): opReady (carries the /// ad-hoc build flag, gates the create flush) and process logs. private func handleProcessFrame(_ op: UInt8, _ payload: [UInt8]) { diff --git a/packages/flutter_cef_macos/macos/Classes/CefWebSession.swift b/packages/flutter_cef_macos/macos/Classes/CefWebSession.swift index 77d86b7..b68d69b 100644 --- a/packages/flutter_cef_macos/macos/Classes/CefWebSession.swift +++ b/packages/flutter_cef_macos/macos/Classes/CefWebSession.swift @@ -116,11 +116,20 @@ final class CefWebSession: NSObject, FlutterTexture { // CefProfileHost under its browsersLock (the reader flips it there) — a cheap per-frame // first-paint check that avoids a second lock on the hot paint path. var firstPresentSeen = false + // Count of present frames delivered (guarded by CefProfileHost.browsersLock, like + // firstPresentSeen). The create-pacer advances to the next browser only once this + // reaches a small threshold — i.e. the browser is STABLY producing, not just one + // first frame — so the next create's first-frame GPU allocation can't knock a barely- + // established browser back out. + var presentCount = 0 private weak var registry: FlutterTextureRegistry? private var width: Int private var height: Int - private let dpr: CGFloat + // Device pixel ratio. Mutable: a canvas-zoom crispness re-render changes it (same logical + // w/h, higher density) so the surface reallocates at logical*dpr px. Guarded by bufferLock + // (read on the host reader thread via `scale`/createSnapshot). + private var dpr: CGFloat private var ioSurface: IOSurfaceRef? private var pixelBuffer: CVPixelBuffer? @@ -141,6 +150,7 @@ final class CefWebSession: NSObject, FlutterTexture { private var resizeInFlight = false private var pendingRequestedW = 0 private var pendingRequestedH = 0 + private var pendingRequestedDpr: CGFloat = 0 // 0 = no dpr change requested private var resizeSentAtNs: UInt64 = 0 // Bumped on every sendResize. The resize watchdog captures it and bails if a newer resize // has since gone out — so during a smoothly-advancing drag the watchdog is a no-op, and it @@ -154,12 +164,12 @@ final class CefWebSession: NSObject, FlutterTexture { bufferLock.lock(); defer { bufferLock.unlock() } return ioSurface.map { IOSurfaceGetID($0) } ?? 0 } - // Geometry, exposed for the host's opCreateBrowser payload. width/height are + // Geometry, exposed for the host's opCreateBrowser payload. width/height/dpr are // mutated by resize() on the main thread and read by the host on its reader - // thread, so guard them with bufferLock (dpr is immutable, so scale needn't). + // thread, so guard them with bufferLock. var w: Int { bufferLock.lock(); defer { bufferLock.unlock() }; return width } var h: Int { bufferLock.lock(); defer { bufferLock.unlock() }; return height } - var scale: CGFloat { dpr } + var scale: CGFloat { bufferLock.lock(); defer { bufferLock.unlock() }; return dpr } init(sessionId: String, width: Int, height: Int, dpr: CGFloat, registry: FlutterTextureRegistry) { @@ -169,7 +179,7 @@ final class CefWebSession: NSObject, FlutterTexture { self.dpr = dpr self.registry = registry super.init() - if let (surf, buffer) = makeBuffers(self.width, self.height) { + if let (surf, buffer) = makeBuffers(self.width, self.height, self.dpr) { publishBuffers(surf, buffer, self.width, self.height) } self.textureId = registry.register(self) @@ -188,24 +198,36 @@ final class CefWebSession: NSObject, FlutterTexture { // MARK: FlutterTexture + private var diagCopyCount = 0 // DIAG + private var diagPresentCount = 0 // DIAG func copyPixelBuffer() -> Unmanaged? { bufferLock.lock() defer { bufferLock.unlock() } + diagCopyCount += 1 // DIAG — logged BEFORE the nil guard so a nil-buffer session shows + if ProcessInfo.processInfo.environment["FLUTTER_CEF_DEBUG"] != nil + && diagCopyCount % 120 == 0 { + let liveSid = pixelBuffer.flatMap { CVPixelBufferGetIOSurface($0) }.map { IOSurfaceGetID($0.takeUnretainedValue()) } ?? 0 + let latestSid = ioSurface.map { IOSurfaceGetID($0) } ?? 0 + NSLog("[cefdiag] copy bid=\(browserId) tex=\(textureId) hasPB=\(pixelBuffer != nil) liveSurf=\(liveSid) latestSurf=\(latestSid) inFlight=\(resizeInFlight) pendSurf=\(pendingSurfaceId)") + } guard let pb = pixelBuffer else { return nil } return Unmanaged.passRetained(pb) } // MARK: Public control - func resize(width newW: Int, height newH: Int) { + func resize(width newW: Int, height newH: Int, dpr newDpr: CGFloat) { let w = max(1, newW), h = max(1, newH) + let d = newDpr > 0 ? newDpr : dpr // 0/invalid keeps the current density bufferLock.lock() - // Always record the latest requested size; it's what maybeSendNextResize sends when the - // in-flight resize promotes. + // Always record the latest requested size+dpr; it's what maybeSendNextResize sends when + // the in-flight resize promotes. pendingRequestedW = w pendingRequestedH = h + pendingRequestedDpr = d let blocked = resizeInFlight - let same = (w == width && h == height) + // A dpr change (canvas-zoom crispness) needs a reallocation just like a size change. + let same = (w == width && h == height && d == dpr) bufferLock.unlock() // While a resize is still painting, just record the latest size (above). Its present sends // the next one (maybeSendNextResize); if cef_host drops that paint, the resizeWatchdog @@ -214,18 +236,18 @@ final class CefWebSession: NSObject, FlutterTexture { // → froze mid-drag). NOTE: no inline timeout here — racing ahead on a slow/heavy page is // exactly what desynced the presents and left the page stuck; the watchdog handles wedges. if blocked || same { return } - sendResize(w, h) + sendResize(w, h, d) } /// Allocate the new surface, point cef_host at it, and send the resize — marking it /// in-flight so the next size waits for this one's present (see resize()/maybeSendNextResize). /// Only ever called on the main thread (resize / maybeSendNextResize), so sendFrame stays /// serialized. - private func sendResize(_ w: Int, _ h: Int) { - // Create the new surface OUTSIDE the lock (expensive). H4: publish surface id + new - // dims ATOMICALLY in one bufferLock section so a concurrent host read (createSnapshot - // on the reader thread) can't see new dims with the old surface id. - guard let (surf, buffer) = makeBuffers(w, h) else { return } + private func sendResize(_ w: Int, _ h: Int, _ d: CGFloat) { + // Create the new surface OUTSIDE the lock (expensive) at the requested density. H4: + // publish surface id + new dims ATOMICALLY in one bufferLock section so a concurrent + // host read (createSnapshot on the reader thread) can't see new dims with the old id. + guard let (surf, buffer) = makeBuffers(w, h, d) else { return } let sid = IOSurfaceGetID(surf) guard sid != 0 else { return } // Resize-flash fix: point the host at the NEW surface (ioSurface drives surfaceId / @@ -239,6 +261,7 @@ final class CefWebSession: NSObject, FlutterTexture { pendingSurfaceId = sid width = w height = h + dpr = d resizeInFlight = true resizeSentAtNs = nowNs() resizeGen &+= 1 @@ -248,6 +271,7 @@ final class CefWebSession: NSObject, FlutterTexture { appendU32(&payload, UInt32(w)) appendU32(&payload, UInt32(h)) appendU32(&payload, sid) + appendF64(&payload, Double(d)) // cef_host updates slot->dpr → re-renders at new density sendFrame(Self.opResize, payload) // Re-kick this resize if its present never lands (see resizeWatchdog). During a smoothly // advancing drag gen keeps moving and this no-ops; it only bites a genuine wedge. @@ -303,9 +327,10 @@ final class CefWebSession: NSObject, FlutterTexture { private func maybeSendNextResize() { bufferLock.lock() let w = pendingRequestedW, h = pendingRequestedH - let need = !resizeInFlight && w > 0 && (w != width || h != height) + let d = pendingRequestedDpr > 0 ? pendingRequestedDpr : dpr + let need = !resizeInFlight && w > 0 && (w != width || h != height || d != dpr) bufferLock.unlock() - if need { sendResize(w, h) } + if need { sendResize(w, h, d) } } private func nowNs() -> UInt64 { DispatchTime.now().uptimeNanoseconds } @@ -456,13 +481,18 @@ final class CefWebSession: NSObject, FlutterTexture { /// H4: CREATE an IOSurface + CVPixelBuffer for (w,h) but do NOT publish them — the /// caller publishes surface + geometry atomically via publishBuffers so a concurrent /// createSnapshot()/copyPixelBuffer never sees a surface and dims out of sync. - private func makeBuffers(_ w: Int, _ h: Int) -> (IOSurfaceRef, CVPixelBuffer)? { + private func makeBuffers(_ w: Int, _ h: Int, _ scale: CGFloat) -> (IOSurfaceRef, CVPixelBuffer)? { // Allocate at PHYSICAL (Retina) resolution = logical * dpr, so the texture // is crisp on HiDPI displays; cef_host renders the OSR buffer at the same // scale (via GetScreenInfo.device_scale_factor). 64-byte-aligned stride keeps - // the IOSurface Metal/CVPixelBuffer-compatible. - let pw = max(1, Int((Double(w) * Double(dpr)).rounded())) - let ph = max(1, Int((Double(h) * Double(dpr)).rounded())) + // the IOSurface Metal/CVPixelBuffer-compatible. `scale` is passed (not read from + // self.dpr) so a resize that changes dpr allocates at the NEW density. Clamp to the + // same ceiling cef_host enforces (dpr<=8): the shipped widget already clamps, but the + // public CefWebController.resize(dpr:) does not, and an unclamped dpr is an O(dpr^2) + // allocation AND would desync the host scale (host caps at 8, surface wouldn't). + let s = min(max(Double(scale), 0.5), 8.0) + let pw = max(1, Int((Double(w) * s).rounded())) + let ph = max(1, Int((Double(h) * s).rounded())) let bytesPerRow = ((pw * 4) + 63) & ~63 let props: [CFString: Any] = [ kIOSurfaceWidth: pw, @@ -517,8 +547,11 @@ final class CefWebSession: NSObject, FlutterTexture { /// (Retina) pixel dims = logical * dpr. private func notifySurface(_ sid: UInt32, _ logicalW: Int, _ logicalH: Int) { guard sid != 0 else { return } - onSurface?(sid, Int((Double(logicalW) * Double(dpr)).rounded()), - Int((Double(logicalH) * Double(dpr)).rounded())) + // dpr is mutable (canvas-zoom crispness) and read off the reader thread here, so snapshot + // it under bufferLock; then invoke onSurface UNLOCKED (the callback reads session accessors). + bufferLock.lock(); let s = Double(dpr); bufferLock.unlock() + onSurface?(sid, Int((Double(logicalW) * s).rounded()), + Int((Double(logicalH) * s).rounded())) } /// Re-emit the current live surface to a just-attached onSurface consumer. The init @@ -572,6 +605,11 @@ final class CefWebSession: NSObject, FlutterTexture { } let tid = textureId bufferLock.unlock() + diagPresentCount += 1 // DIAG + if ProcessInfo.processInfo.environment["FLUTTER_CEF_DEBUG"] != nil + && diagPresentCount % 120 == 0 { + NSLog("[cefdiag] present bid=\(browserId) tex=\(tid) count=\(diagPresentCount)") + } // R2: a resized surface just went live — tell WebRTC consumers to re-point their // IOSurface capture at the new id (this is the "fires on each resize" half). if promotedSid != 0 { notifySurface(promotedSid, promotedW, promotedH) } diff --git a/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift b/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift index bd27464..bb77543 100644 --- a/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift +++ b/packages/flutter_cef_macos/macos/Classes/FlutterCefPlugin.swift @@ -27,7 +27,22 @@ public class FlutterCefPlugin: NSObject, FlutterPlugin { // go straight to ephemeral instead of racing onto a doomed shared host. private var adhocBlockedProfiles: Set = [] + /// Raise the soft open-file limit toward the hard cap (best-effort, once at plugin + /// registration). Each cef_host costs several fds (IPC + CDP pipes + per-relay + /// listener), so many agent-controlled tiles can approach a GUI app's default soft + /// RLIMIT_NOFILE (often 256) and fail spawns with EMFILE. + private static func raiseOpenFileLimit() { + var rl = rlimit() + guard getrlimit(RLIMIT_NOFILE, &rl) == 0 else { return } + let want: rlim_t = 4096 + if rl.rlim_cur < want { + rl.rlim_cur = min(want, rl.rlim_max) + _ = setrlimit(RLIMIT_NOFILE, &rl) + } + } + public static func register(with registrar: FlutterPluginRegistrar) { + raiseOpenFileLimit() let instance = FlutterCefPlugin() instance.textureRegistry = registrar.textures let channel = FlutterMethodChannel( @@ -587,7 +602,8 @@ public class FlutterCefPlugin: NSObject, FlutterPlugin { private func resize(_ a: [String: Any], _ result: @escaping FlutterResult) { if let id = a["sessionId"] as? String, let s = sessions[id] { - s.resize(width: a["width"] as? Int ?? 800, height: a["height"] as? Int ?? 600) + s.resize(width: a["width"] as? Int ?? 800, height: a["height"] as? Int ?? 600, + dpr: (a["dpr"] as? Double).map { CGFloat($0) } ?? 0) result(["textureId": s.textureId]) } else { result(nil) diff --git a/packages/flutter_cef_macos/native/cef_host/main.mm b/packages/flutter_cef_macos/native/cef_host/main.mm index f5942e1..bf7a287 100644 --- a/packages/flutter_cef_macos/native/cef_host/main.mm +++ b/packages/flutter_cef_macos/native/cef_host/main.mm @@ -181,6 +181,12 @@ // (not a single global) so paints on independent browsers don't contend. std::mutex surface_mutex; IOSurfaceRef surface = nullptr; // host-shared IOSurface we paint into + // Cached Metal wrap of `surface` for the GPU-blit DEST. Wrapping it fresh every + // frame is pure churn (the surface is stable except on resize), so cache it and + // recreate only when the wrapped IOSurface id changes. Released wherever `surface` + // is. Guarded by surface_mutex. MRC: holds the +1 from newTextureWithDescriptor. + id dst_mtl = nil; + uint32_t dst_mtl_sid = 0; int width = 800; // logical (DIP) — GetViewRect; CEF scales by dpr. int height = 600; double dpr = 1.0; // device pixel ratio; the IOSurface is logical*dpr px. @@ -231,6 +237,16 @@ // DoSetVisible); `begin_frame_pump_started` guards a double-start. UI-thread only. bool visible = true; bool begin_frame_pump_started = false; + // Per-slot pump-tick + accelerated-paint counters, logged from PumpBeginFrame when + // FLUTTER_CEF_DEBUG is set — diagnostics for paint-stall investigation at scale. + uint64_t diag_pump_ticks = 0; + uint64_t diag_paint_count = 0; + // about:blank-first (FLUTTER_CEF_BLANK_FIRST): the real URL to navigate to AFTER the + // browser establishes on about:blank. Establishing on blank makes the first-frame GPU + // handshake near-instant (so the create-pacer releases its slot fast), decoupling + // establishment from the real page's load time. Navigated + cleared on first paint. + // UI-thread only. + std::string pending_nav_url; }; // Routing map from a wire browser id to its Slot. MUTATED ONLY ON THE CEF UI @@ -252,6 +268,7 @@ return it == g_slots_by_wire_id.end() ? nullptr : it->second; } +void SendLog(uint32_t browser_id, const std::string& msg); // DIAG fwd decl (defined below) // External begin-frame pump. window_info.external_begin_frame_enabled (set in DoCreateBrowser) // turns OFF CEF's internal frame timer, so the GPU/Viz compositor produces a frame ONLY when we // call SendExternalBeginFrame — which, unlike Invalidate(), deterministically drives one frame @@ -264,6 +281,12 @@ void PumpBeginFrame(uint32_t wire_id) { std::shared_ptr slot = LookupWireId(wire_id); if (!slot || !slot->browser) return; // disposed mid-flight — let the pump die if (slot->visible) slot->browser->GetHost()->SendExternalBeginFrame(); + slot->diag_pump_ticks++; // DIAG + if (std::getenv("FLUTTER_CEF_DEBUG") && slot->diag_pump_ticks % 120 == 0) + SendLog(wire_id, "diag wire=" + std::to_string(wire_id) + + " pumpTicks=" + std::to_string(slot->diag_pump_ticks) + + " paints=" + std::to_string(slot->diag_paint_count) + + " visible=" + std::to_string(slot->visible ? 1 : 0)); CefPostDelayedTask(TID_UI, base::BindOnce(&PumpBeginFrame, wire_id), slot->visible ? 16 : 100); } @@ -676,18 +699,28 @@ void CompositeMetalLocked(IOSurfaceRef view_src) { height:sh mipmapped:NO]; sd.storageMode = MTLStorageModeShared; - MTLTextureDescriptor* dd = [MTLTextureDescriptor - texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm - width:dw - height:dh - mipmapped:NO]; - dd.storageMode = MTLStorageModeShared; + // src wraps CEF's pooled view_src — it rotates, so wrap per-call. id src = [g_mtl_device newTextureWithDescriptor:sd iosurface:view_src plane:0]; - id dst = [g_mtl_device newTextureWithDescriptor:dd - iosurface:slot_->surface - plane:0]; + // dst wraps slot_->surface (stable except on resize) — cache it and only + // recreate when the wrapped surface id changes, halving per-frame texture + // churn on the GPU thread. + const uint32_t dsid = IOSurfaceGetID(slot_->surface); + if (slot_->dst_mtl == nil || slot_->dst_mtl_sid != dsid) { + [slot_->dst_mtl release]; + MTLTextureDescriptor* dd = [MTLTextureDescriptor + texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm + width:dw + height:dh + mipmapped:NO]; + dd.storageMode = MTLStorageModeShared; + slot_->dst_mtl = [g_mtl_device newTextureWithDescriptor:dd + iosurface:slot_->surface + plane:0]; + slot_->dst_mtl_sid = dsid; + } + id dst = slot_->dst_mtl; // cached (released on resize/close) if (src && dst) { const int cw = std::min(sw, dw), ch = std::min(sh, dh); id cb = [g_mtl_queue commandBuffer]; @@ -707,7 +740,7 @@ void CompositeMetalLocked(IOSurfaceRef view_src) { blitted = true; } [src release]; - [dst release]; + // dst is cached on the Slot (released on resize/close), not per-frame. } } if (blitted) { @@ -740,6 +773,16 @@ void CompositeMetalLocked(IOSurfaceRef view_src) { void OnAcceleratedPaint(CefRefPtr, PaintElementType type, const RectList&, const CefAcceleratedPaintInfo& info) override { + slot_->diag_paint_count++; // DIAG + // about:blank-first: the browser has established (first paint on about:blank) — now + // navigate to the real URL. The establishment slot has already been released by this + // paint, so the real page loads WITHOUT holding a serial slot (concurrent with the + // other tiles' loads). Fires once (pending_nav_url cleared). UI thread. + if (!slot_->pending_nav_url.empty() && slot_->browser) { + std::string nav = slot_->pending_nav_url; + slot_->pending_nav_url.clear(); + if (auto frame = slot_->browser->GetMainFrame()) frame->LoadURL(nav); + } IOSurfaceRef src = reinterpret_cast(info.shared_texture_io_surface); if (!src) { @@ -1079,6 +1122,9 @@ void OnBeforeClose(CefRefPtr browser) override { IOSurfaceRef old = slot_->surface; slot_->surface = nullptr; if (old) CFRelease(old); + [slot_->dst_mtl release]; + slot_->dst_mtl = nil; + slot_->dst_mtl_sid = 0; } slot_->browser = nullptr; } @@ -1134,6 +1180,19 @@ bool OnBeforeBrowse(CefRefPtr browser, CefRefPtr frame, } void OnBeforeCommandLineProcessing( const CefString&, CefRefPtr command_line) override { + // OSR establishment-latency: OSR views have no real OS window, so Chromium's scheduler + // treats every renderer as backgrounded/occluded and LOWERS its process priority during + // the critical first load — delaying the first frame of a tile that's actually visible + // in our canvas. Keeping the renderer at full priority ~halved time-to-first-paint for a + // 20-real-site board (measured), with no race/security change. Default ON; opt out with + // FLUTTER_CEF_KEEP_BG_THROTTLE for debugging. NOTE: we deliberately do NOT add + // --disable-background-timer-throttling — that would keep HIDDEN (off-screen, WasHidden) + // tiles' JS timers running hot, fighting the off-screen-is-cheap property; the priority + // flags below are what speed establishment without that cost. + if (!std::getenv("FLUTTER_CEF_KEEP_BG_THROTTLE")) { + command_line->AppendSwitch("disable-renderer-backgrounding"); + command_line->AppendSwitch("disable-backgrounding-occluded-windows"); + } #ifdef CEF_HOST_ADHOC // Dev / ad-hoc-only (CEF_HOST_ADHOC is ON by default; a signed release sets // -DCEF_HOST_ADHOC=OFF). Mock keychain + basic password store so a launch @@ -1274,6 +1333,16 @@ void DoCreateBrowser(uint32_t wire_id, int w, int h, double dpr, uint32_t sid, window_info.external_begin_frame_enabled = true; CefBrowserSettings settings; settings.windowless_frame_rate = 60; + // about:blank-first: for a real http(s) URL, establish on about:blank (near-instant + // first frame → the pacer's establishment slot frees fast) and defer the real + // navigation to first paint. Skip for data:/file:/about: (already instant) and when the + // env flag is off. + std::string create_url = url; + if (std::getenv("FLUTTER_CEF_BLANK_FIRST") && + (url.rfind("http://", 0) == 0 || url.rfind("https://", 0) == 0)) { + slot->pending_nav_url = url; + create_url = "about:blank"; + } CefRefPtr client = new HostClient(slot); // H3: ASYNC create. CreateBrowserSync BLOCKS this (the single CEF UI) thread until // the renderer + GPU/Viz accelerated-surface handshake completes — so a burst of @@ -1283,7 +1352,7 @@ void DoCreateBrowser(uint32_t wire_id, int w, int h, double dpr, uint32_t sid, // in HostClient::OnAfterCreated, which acks kOpCreated so the host's pacer sends the // NEXT create — serialized by COMPLETION, not a wall-clock guess. bool dispatched = CefBrowserHost::CreateBrowser( - window_info, client, url, settings, nullptr, nullptr); + window_info, client, create_url, settings, nullptr, nullptr); if (!dispatched) { // H7: the create couldn't even be dispatched — OnAfterCreated/OnBeforeClose will // never fire, so reclaim the slot + the looked-up IOSurface (+1 ref) here (else @@ -1300,6 +1369,9 @@ void DoCreateBrowser(uint32_t wire_id, int w, int h, double dpr, uint32_t sid, CFRelease(slot->surface); slot->surface = nullptr; } + [slot->dst_mtl release]; + slot->dst_mtl = nil; + slot->dst_mtl_sid = 0; } if (std::getenv("FLUTTER_CEF_DEBUG")) fprintf(stderr, "[cef_host] createBrowser wire=%u dispatched=%d\n", wire_id, @@ -1324,7 +1396,7 @@ void DoDisposeBrowser(uint32_t wire_id) { } void DoResize(const std::shared_ptr& slot, int w, int h, - uint32_t surface_id) { + uint32_t surface_id, double dpr) { if (w < 1 || w > 16384 || h < 1 || h > 16384) { SendLog(slot->browser_id, "resize: out-of-range dims " + std::to_string(w) + "x" + std::to_string(h)); @@ -1336,14 +1408,28 @@ void DoResize(const std::shared_ptr& slot, int w, int h, "resize: IOSurfaceLookup failed for id " + std::to_string(surface_id)); return; } + // dpr <= 0 means "unchanged" (older/short wire frames). A new dpr (a canvas-zoom + // crispness re-render: same logical w/h, higher device-scale) makes GetScreenInfo + // report the new scale so CEF re-rasterizes the page at logical*dpr to fill the + // host's freshly-reallocated (bigger) IOSurface. + bool dpr_changed = false; { std::lock_guard lock(slot->surface_mutex); if (slot->surface) CFRelease(slot->surface); slot->surface = next; // owns the +1 from Lookup slot->width = w; slot->height = h; + if (dpr > 0.0 && dpr != slot->dpr) { + slot->dpr = dpr; + dpr_changed = true; + } + [slot->dst_mtl release]; // stale: wrapped the old surface + slot->dst_mtl = nil; + slot->dst_mtl_sid = 0; } if (slot->browser) { + // A device-scale change needs the renderer told (screen info), not just a relayout. + if (dpr_changed) slot->browser->GetHost()->NotifyScreenInfoChanged(); slot->browser->GetHost()->WasResized(); // Drive a frame right now at the new size. With external begin-frame this is a guaranteed // tick (not a coalesce-able Invalidate request), so the re-laid-out content composites into @@ -1803,7 +1889,10 @@ void IpcReadLoop() { int w = static_cast(ReadU32BE(p)); int h = static_cast(ReadU32BE(p + 4)); uint32_t sid = ReadU32BE(p + 8); - CefPostTask(TID_UI, base::BindOnce(&DoResize, slot, w, h, sid)); + // Optional trailing f64 dpr (crispness re-render); 0 / absent = unchanged. + double dpr = (plen >= 20) ? ReadF64BE(p + 12) : 0.0; + if (dpr < 0.0 || dpr > 8.0) dpr = 0.0; // guard a bad/forged dpr + CefPostTask(TID_UI, base::BindOnce(&DoResize, slot, w, h, sid, dpr)); break; } case kOpNavigate: { diff --git a/specs/osr-ecosystem-survey.md b/specs/osr-ecosystem-survey.md new file mode 100644 index 0000000..f578532 --- /dev/null +++ b/specs/osr-ecosystem-survey.md @@ -0,0 +1,157 @@ +# CEF Off-Screen Rendering (OSR) in the Wild — Survey & Scaling Synthesis + +**Question driving this survey:** when many OSR Chromium views animate at once on **one** shared `cef_host` process, ~1–4 of 12 never produce a first frame (blank). Proven mechanism: OSR pixels exit via a **per-browser `viz::FrameSinkVideoCapturer`**; one GPU/Viz process sustains only ~8–10 concurrently-capturing animating views before late ones starve. The candidate fix is **multiple `cef_host` processes (more GPU processes) + a take-turns throttle**. + +This report consolidates four implementation clusters (bindings, game-engines, streaming/broadcast, electron-desktop), an internals/scaling cluster, and three gap deep-dives (cloud pixel-streaming fleets; CEF hidden→shown first-paint failures; external-begin-frame pacing/QCefView sync). + +--- + +## 1. Comparison table of notable CEF-OSR implementations + +| Project | Category | Pixel-out path | Accel / shared-texture | FPS control | Runs many OSR? | Scaling approach | +|---|---|---|---|---|---|---| +| **CefSharp** (.NET) | binding | OnPaint CPU readback (default) + OnAcceleratedPaint | Windows **D3D11 handle only**; surfaces handle, host must render it | `windowless_frame_rate` (30 default); `SendExternalBeginFrame` | No orchestration | none built-in; #2940 shows ~1–3 fps callback cadence even with empty body | +| **JCEF / java-cef** (incl. JetBrains JBR) | binding | **OnPaint CPU only** (no accel binding; #506 open) | None | `windowless_frame_rate` (30) | JetBrains runs several, all CPU | none; stays on CPU deliberately | +| **cefpython** | binding | **OnPaint CPU only** (accel removed) | None | `windowless_frame_rate` (caps ~30); recommends `--disable-gpu` | Single-browser examples | go **software** to dodge GPU ceiling (loses WebGL) | +| **CEF4Delphi** | binding | OnPaint + OnAcceleratedPaint | **Cross-platform**: Win D3D11 handle / **mac IOSurface** / Linux dmabuf-fds | `windowless_frame_rate` + external begin-frame | No pooling | never shipped a heavy-OSR demo | +| **CefGlue** (.NET/Avalonia) | binding | OnPaint CPU (default) | Optional Win-D3D11 in some forks | `windowless_frame_rate` | No | none documented | +| **cef-rs** (Rust) | binding | No documented OSR surface | — | — | No | n/a (pre-1.0, windowed-focused) | +| **cef-mixer** (daktronics/mediabuff) | demo | OnAcceleratedPaint | **Win D3D11 zero-copy** | **`SendExternalBeginFrame`** (windowless_frame_rate ignored) | **Yes** — `--grid=2x2`, N browsers composited | host-driven begin-frame; small grids only (never stressed to 12) | +| **cef-spout** | engine-asset | OnAcceleratedPaint (Win); OnPaint fallback elsewhere | Win D3D11 + Spout re-share | external begin-frame | Yes (grid) | per-instance process + unique `--cache-path` | +| **Vuplex 3D WebView** (Unity, commercial) | engine-asset | Win D3D11 OnAcceleratedPaint; **macOS = CPU OnPaint** | Win only; **no accel on Mac** | `SetTargetFrameRate` (60) | Yes, many in one process | **accepts ~10** ("usually 10 active webviews"); no multi-process | +| **ZFBrowser** (Unity) | engine-asset | CPU OnPaint → shared memory → re-upload | None (GPU→CPU→GPU) | internal cap | Yes (one helper proc) | shrink animating surfaces | +| **UnityWebBrowser (UWB)** | engine-asset | CPU buffer over IPC (TCP default) | None | internal cap | **one engine process per browser** | structural multi-process (for isolation, CPU path) | +| **Unreal WebUI / UCefView** | framework | OnPaint (stock) + OnAcceleratedPaint (Web UI/UCefView) | Win D3D11 shared texture into RHI | `windowless_frame_rate`-style | A few layered widgets | layering; one CEF/GPU proc; flicker if texture held | +| **OBS obs-browser** | app | OnPaint (default) + OnAcceleratedPaint | **Win NT handle / mac IOSurface / Linux dmabuf** | `windowless_frame_rate` (max 60); per-source FPS | **Yes — largest real consumer** | **cap to 3-4 sources + shutdown-when-hidden** | +| **Streamlabs Desktop** | app | (inherits obs-browser) | same | same | Yes | same cap-and-hide guidance | +| **vMix** | app | CEF OSR → D3D (Win only) | not user-exposed | global perf mode | Per-input Chromium | shrink resolution; avoid multi-GPU | +| **TouchDesigner Web Render TOP** | app | CEF3 OSR; shared-mem default | **Win D3D11 shared-texture opt-in** | `maxrenderrate` target | **Yes — process(es) per Web Render TOP** | **multi-process per view** (closest precedent to our plan) | +| **SpoutBrowser** | alternative | OSR shared texture → Spout | Win D3D11 | `--off-screen-frame-rate` (30/60) | Yes | separate process + unique `--cache-path` | +| **Electron OSR (default)** | framework | `paint` NativeImage = CPU readback | No | `setFrameRate` (≤240); damage-driven | Multiple offscreen windows | none; per-window capturer, one GPU proc | +| **Electron OSR (`useSharedTexture`)** | framework | OnAcceleratedPaint-equivalent via Viz GMB pool | **Win D3D11 / mac IOSurface / Linux dmabuf** | `setFrameRate`; 240-cap removed for shared-tex | Multiple, no pooling | none; documents `kFramePoolCapacity=10` + copy-then-release | +| **Neko / Kasm / BrowserBox** (cloud fleets) | alternative-arch | **whole-display capture** (Xvfb/X11) or CDP screencast — **NOT** per-browser OSR | n/a (HW video encode: VAAPI/NVENC) | encoder-paced; KasmVNC down-scales | **Yes, dozens–hundreds** | **one capture per display; scale = more containers/processes** | +| **Coherent Gameface / Ultralight** | alternative-engine | n/a (not CEF) — renders inline with host | n/a | host-frame | Yes, many views | abandon Chromium OSR model entirely | + +*Uncertain/version-sensitive:* CefSharp's exact accelerated-path fps, vMix internals, XSplit's OSR path (docs too thin — omitted from the table beyond a note that it's CEF-class). + +--- + +## 2. Who actually runs MANY simultaneous OSR browsers — and how they cope + +**Real many-OSR-browser consumers are rare, and none raises the in-flight cap. They ration *active* capturers:** + +- **OBS Studio (obs-browser)** — the largest, most-stressed CEF-OSR consumer. Its own guidance: **"Limit to 3-4 browser sources maximum"** because "the GPU cannot render all your sources quickly enough." Second lever: **"Shutdown source when not visible"** (kills the Chromium process when hidden) — the direct analog of an off-screen visibility gate. The **OBS 30.2→31 regression** (#470: "15+ videos in iframes" → "more than a couple at a time freezes all the videos," after the new CEF 127 shared-texture impl) and **#468** ("hang on a frame for 250ms, repeating") are the clearest public reproductions of our ceiling, confirmed on 3 machines. + +- **Vuplex (Unity, commercial)** publishes a number: **"usually 10 active webviews on Windows, macOS, Android without performance issues."** This independently corroborates both our empirical ~8–10 ceiling and Chromium's `kFramePoolCapacity=10`. Their answer to scale is **not** multi-process — they accept ~10 and ship plain **CPU OnPaint on macOS**. + +- **cef-mixer** composites a grid of independent browsers but only at small counts (2×2/3×3) — it demonstrates the pattern, never stresses the ceiling. + +- **TouchDesigner** runs **multiple CEF process groups per Web Render TOP** and scales acceptably — the strongest existing precedent for our multi-process direction (caveat below). + +**Who looks like a many-browser app but isn't OSR:** Spotify, Steam, Battle.net, Epic, GOG, Discord, Slack, VS Code — all **windowed** CEF/Electron (native HWND/NSView). They never touch `FrameSinkVideoCapturer` and offer **no** evidence that many-simultaneous-OSR scales. This is a meaningful negative result: the OSR-into-texture niche has **no large public app running 12 concurrently-animating OSR browsers.** + +**The one industry that genuinely runs dozens–hundreds of live browsers** (Neko, Kasm/KasmVNC, BrowserBox) **categorically avoids per-browser OSR.** They render *windowed* Chromium into a virtual display and capture the **whole display once** at the X-server/compositor level, then HW-encode (VAAPI/NVENC). There is exactly **one capturer per display**, so the per-capturer pool ceiling never arises — and they scale "more browsers" as **more containers/processes**, never more capturers in one process. + +--- + +## 3. State of the art for getting pixels out at scale + +**Two paths, same source:** + +1. **OnPaint (CPU readback)** — historical default; GPU→CPU copy per frame. Hosts the documented "every other frame dropped" 30fps behavior (`CropScaleReadbackAndCleanMailbox` can't keep up at 60Hz). +2. **OnAcceleratedPaint (GPU shared texture)** — Win D3D11 NT handle / **macOS IOSurfaceRef** / Linux dmabuf. Lowers **per-frame cost** but **does not change the per-browser capturer concurrency model**. + +**Critical: both paths route through `viz::FrameSinkVideoCapturer` → `OnFrameCaptured`** (confirmed in CEF #3730). The accelerated path merely swaps a CPU `CopyOutputRequest` target for a `GpuMemoryBuffer`/`MappableSharedImage`. Confirmed constants in `frame_sink_video_capturer_impl.h`: + +``` +kDesignLimitMaxFrames = 10; +kFramePoolCapacity = kDesignLimitMaxFrames + 1; // 11 +kTargetPipelineUtilization = 0.6f; // "red line" ≈ 6 in-flight +``` + +These are **per-capturer (per browser)**, independently confirmed from a second codebase — **Electron's OSR README states `kFramePoolCapacity=10` verbatim.** + +**True zero-copy is impossible — independently re-derived by Electron and CEF.** The pool hands a **different** texture each frame and reclaims it on `release()`/callback return. Electron's PR #42953 author: it's "actually one [copy], there's a CopyRequest of frame texture." Mandatory pattern everywhere: **open the handle → copy to your own intermediate texture → release immediately.** This matches our existing "GPU-blit-the-copy" conclusion. + +**macOS is the weak platform for accelerated OSR — and everyone knows it.** Upstream CEF historically did **not** call OnAcceleratedPaint on macOS; it required out-of-tree patches that **cannot rebase past Chromium ~103**, and the reference Metal POC is "slow and buggy." **Vuplex ships CPU OnPaint on Mac despite having D3D11 accel on Windows.** OBS ships **patched CEF (4183)** specifically to get macOS IOSurface OnAcceleratedPaint. Electron itself flags (#45428) that the macOS `useSharedTexture` path has "neither test nor documentation." + +> **Actionable uncertainty (verify):** confirm flutter_cef's `cef_host` is actually on the **patched IOSurface OnAcceleratedPaint** path. If it has silently fallen back to **CPU OnPaint**, the per-capturer ceiling is *much* worse (every-other-frame readback drop), and fixing the paint path would be higher-leverage than multi-process. + +**Multi-process at scale:** the cloud fleets and TouchDesigner/SpoutBrowser all scale by **more processes**, each owning **one** capture/encoder. SpoutBrowser surfaces a concrete gotcha: you need a **unique `--cache-path` per instance** or cefclient "reuses the main browser process." + +--- + +## 4. Does anyone solve many-simultaneous-animating OSR? + +**No one solves it *within the per-browser CEF OSR capturer model.* The genuine solutions all step outside it.** + +- **Pooling across processes (the cloud-fleet answer):** Neko/Kasm/BrowserBox **eliminate** N capturers by capturing **one display** (or using CDP `Page.startScreencast` per tab with `everyNthFrame` + `screencastFrameAck` backpressure). This is the only architecture that runs hundreds of live browsers. **But it doesn't fit our requirement** of independently-positioned, separately-zoomable per-tile Flutter textures — you'd need a tiling/scene-graph step to carve one capture back into per-tile textures. The transferable *principle* is "amortize the capture pool across surfaces," not the literal architecture. + +- **Take-turns / load-shed:** Chromium's own `VideoCaptureOracle` (drop resolution, not just fps; reduce ≤once/3s), KasmVNC "Video Mode" down-scaling, and CDP `everyNthFrame`+`Ack` are all **the same idea** — gate on completion/ack, shed load by lowering resolution/fps. Nobody enlarges the pipeline. + +- **Accelerated path avoiding the capturer:** **does not exist for CEF.** OnAcceleratedPaint still flows through `FrameSinkVideoCapturer`. The only engines that avoid a per-view capturer are **non-CEF**: Coherent Gameface, Ultralight, **Servo + surfman** (one WebRender context, N surfaces usable as host textures — offscreen + multi-webview landed 2024), and **WPE/WPEBackend-fdo** (per-view dmabuf/EGLImage export, "synchronization implicit, avoiding additional capture infrastructure"). These prove a general web engine *can* render N live views→host textures with **no** per-view capturer — but **none has a first-class macOS IOSurface story**, so they're architecture validation, not drop-in replacements. + +- **Multi-GPU-process pooling for OSR specifically:** **searches found ZERO projects** spawning multiple CEF/GPU processes to raise OSR concurrency. OBS, Electron, cef-mixer, QCefView all share **one** GPU/Viz process. **So our multi-`cef_host` direction is novel-in-this-corpus** — and the only surveyed approach that actually raises the *aggregate* readback ceiling. + +--- + +## 5. Lessons for our problem; does our fix match prevailing practice? + +**Our diagnosis is correct and triple-confirmed** (Chromium source, Electron README, Vuplex's ~10 number). The ceiling is **aggregate single-GPU-process readback bandwidth** across N per-browser capturers, not a single global constant. + +### 5a. Our two-pronged fix is well-supported — with one important sharpening + +- **Multi-`cef_host` (more GPU processes): VALIDATED but uncommon for OSR.** TouchDesigner (process-group per view) and the cloud fleets (one capture/encode per process/container) are the precedents. **Caveat (TouchDesigner/Malcolm):** extra GPU contexts add context-switch overhead "but not hugely so," and each extra `cef_host` on macOS is a **full GPU+Renderer+Plugin helper-app tree**. → **Use a small bounded pool with a strict per-host capturer budget (~6 sustained, hard-stop ~10); shard only when a host would exceed budget. Do NOT spawn one host per tile.** + +- **Take-turns throttle: build it as round-robin EXTERNAL-BEGIN-FRAME pacing, NOT `windowless_frame_rate` tuning.** This is the survey's biggest correction. Under `SendExternalBeginFrame`, **CEF's internal timing is disabled and `windowless_frame_rate` is IGNORED** (cef-mixer + obs-browser docs). Worse, **CefSharp #2675/#2940 prove the accelerated path collapses to ~1 fps WITHOUT a host-driven begin-frame pump** (callback fired ~1–3×/sec even with an empty body). So a throttle that merely lowers `windowless_frame_rate` would be a **no-op** for our IOSurface/OnAcceleratedPaint path. The correct scheduler shape: + - **ONE BeginFrameSource** (Flutter/host composition tick or CVDisplayLink), **fanned out round-robin** to N browsers. + - **Completion-gated per browser:** issue a browser's next BeginFrame **only after its prior frame completed** (`OnAcceleratedPaint`/`OnFrameComplete`). Firing a second `SendExternalBeginFrame` before the prior completes triggers **`Check failed: !pending_frame_callback_. Got overlapping IssueExternalBeginFrame`** — a **GPU-process crash** that, on a shared host, **blanks every tile** (CEF #2800). cef-mixer's unconditional per-tick `SendExternalBeginFrame` is the *anti-pattern* to avoid. + +### 5b. A second, possibly *primary*, cause of our exact symptom — and a hazard in the throttle itself + +The gap deep-dive on **CEF #2483 / #3427 (FrameEvictionManager)** is the closest match to "1–4 of 12 never produce a first frame," and it **changes the recommendation**: + +- OBS/CefSharp engineers hit our **exact** symptom — *"six OSR windows… only four behave normally, other two stop refresh," ">5 browsers," "blank buffer after `WasHidden(false)`"* — and root-caused it to **Chromium's `FrameEvictionManager` evicting compositor frames** for off-screen browsers (an LRU soft cap), **not** to capturer throughput. After eviction, `WasHidden(false)` returns a blank/stale buffer. +- **The documented fix is a forced resize with *changed* dims** (a same-size `WasResized()` is a no-op; CEF added a size guard). CefSharp's shipped recipe: resize −1px then restore. `Invalidate`/`NotifyScreenInfoChanged`/begin-frame ticks alone **do not** un-stick an evicted view. +- **Hazard:** a take-turns throttle that **hides off-screen/idle tiles via `WasHidden`** would *manufacture the many-hidden-then-shown pattern that arms eviction.* Per the cross-check against `flutter_cef`'s `cef_host/main.mm`, **`DoSetVisible`'s un-hide path lacks the force-resize kick** (it only sets `WasHidden(!visible)`), while the **resize path already does `WasResized()` + `SendExternalBeginFrame` correctly** — so the un-hide path should copy that pattern *with changed dims*. + +> **Strong recommendation:** before committing to "more processes for more GPU," **instrument whether the blanks correlate with frame eviction** (check `LocalSurfaceId`/frame-id on the blank slots, per the #2483 reporters) **vs. capturer count**. If eviction is the cause, more GPU processes won't help; the cheap, well-precedented fix is **force-resize-on-unhide**. These are not mutually exclusive — land the resize-kick regardless, since it's low-risk and addresses a class our throttle would otherwise worsen. + +### 5c. Operational guardrails the survey surfaced + +- **Release/copy every frame inside the callback; never hold the IOSurface across frames** — no IOSurface primitive supports safe cross-frame holding; the pool reclaims at callback return. A holding/slow consumer **starves the pool and reproduces blanks independent of GPU saturation** (Electron added a GC-warning for exactly this). +- **macOS sync is by ordering, not exclusion.** There is **no keyed-mutex analog** on macOS (keyed-mutex is the QCefView/Windows-D3D11 proposal; Electron even *removes* the mutex on Windows). CEF hands us a **raw IOSurfaceRef with no fence and no mutex.** Safety rests on doing the copy + an explicit **GPU flush / Metal commit inside `OnAcceleratedPaint` before returning**, ordered ahead of CEF's pool recycle. → **Verify flutter_cef issues that flush/commit and doesn't return early; a hitch that delays the copy past recycle yields a torn/blank frame even at low view counts.** We can fence our *own* read but cannot make CEF wait for us — so a "fix the sync on one GPU process" path is **not** available to us on macOS the way it is on Windows. +- **Pin/validate CEF carefully:** the shared-texture path is where concurrency is most fragile across upgrades (OBS 30.2→31 regression; CEF #4057 null handle on 143 release builds; the 250ms animation-region detection bug, chromium 391118566). If our blanks correlate with **animation start**, part of it may be that casting/animation-detection bug — **fixable by a newer Chromium pin rather than adding processes.** + +### 5d. Prevailing practice vs. our plan — verdict + +| Lever | Prevailing practice | Our plan | +|---|---|---| +| Cap active capturers | OBS "3-4 sources max" + shutdown-when-hidden; Vuplex accepts ~10 | take-turns throttle (matches) | +| Multi-GPU-process for OSR | **nobody** (TouchDesigner/fleets do per-process-capture, not multi-GPU-for-one-scene) | multi-`cef_host` (**novel; sound; bound the pool**) | +| Throttle mechanism | external begin-frame (cef-mixer) / oracle / CDP ack | **must be external begin-frame, completion-gated — not `windowless_frame_rate`** | +| Avoid the capturer | leave CEF (Gameface/Ultralight/Servo/WPE) | n/a (committed to CEF) | +| Display-amortized capture | cloud fleets | **doesn't fit per-tile textures** | + +**Better idea the survey surfaced:** the combination — **a small bounded pool of `cef_host` processes (each under a ~6-sustained capturer budget) + a single-source, round-robin, completion-gated external-begin-frame scheduler per host + a hardened force-resize-on-unhide path + prioritizing cold-start (first-frame) capturers over steady-state animators when shedding load.** Prioritizing first-frame establishment directly targets our actual failure (late views never get frame #1); the oracle's `kDebouncingPeriodForAnimatedContent=3s` / `kProvingPeriodForAnimatedContent=30s` explains *why* late-joining animators into a saturated GPU get starved, so **admitting views sequentially** (let each establish a steady frame before admitting the next) is a precise counter. + +--- + +## 6. The honest frontier — what nobody appears to solve + +- **No one solves many-simultaneous-animating OSR *inside* the per-browser capturer model.** Every real solution either rations active capturers (OBS/Vuplex), leaves CEF for a non-capturer engine (Gameface/Ultralight/Servo/WPE), or captures one display and HW-encodes (cloud fleets). The accelerated/zero-copy path **does not** escape the capturer. + +- **No public "12 concurrently-animating OSR browsers" benchmark exists.** Our observed ~8–10 ceiling is **novel empirical data** that matches `kDesignLimitMaxFrames`/`kTargetPipelineUtilization` math almost exactly — treat it as the authoritative number to size the throttle and process fan-out. + +- **No multi-GPU-process OSR precedent** — our direction is uncharted; sound, but unvalidated at scale by anyone else. + +- **macOS accelerated OSR is upstream-unsupported and patch-fragile.** No keyed-mutex, no fence handed to the consumer, no upstream test/docs; the GPU-OSR patches can't rebase past Chromium ~103; the maintainer's long-term answer (Ozone, #3263) is Linux-only and **"not currently planned or staffed"** by Google. **We are on the least-trodden platform path.** + +- **No begin-frame-completion signal is exposed to clients on the relevant boundaries** (CEF #4166 maintainer: "I'm not sure there is a reliable signal currently, as this involves multiple asynchronous pipelines in different processes"). First-frame establishment under contention has **no clean upstream primitive** — the field workaround is per-frame marker pixels in JS + resize-kicks. + +- **`FrameEvictionManager` blanks on hide→show with >5–6 browsers (#3427) remain OPEN** with no clean upstream fix — only the resize-kick workaround. This is the failure class our own throttle could *arm*, and it is the single most under-appreciated risk in the planned design. + +--- + +### Files referenced +- `/Users/wenkaifan/Dev/flutter_cef/packages/flutter_cef_macos/native/cef_host/main.mm` — the un-hide path (`DoSetVisible`, missing force-resize kick), the correct resize path (`WasResized()` + `SendExternalBeginFrame`), the begin-frame pump (`PumpBeginFrame`, no in-flight/OnFrameComplete gate), and the load-time self-heal (`OnLoadEnd`→`Invalidate`, `kOpInvalidate`/`DoInvalidate`) are the concrete code sites to harden before adding multi-process/take-turns. \ No newline at end of file diff --git a/specs/osr-many-views.md b/specs/osr-many-views.md new file mode 100644 index 0000000..dc5e034 --- /dev/null +++ b/specs/osr-many-views.md @@ -0,0 +1,219 @@ +# OSR with many animating views: why it caps and how to scale + +> **★ SOLVED (2026-06-25). The fix is SOFTWARE-ONLY on ONE shared host — no multi-process, +> no cookie-sync, no engine patch.** The earlier analysis in this doc (§3–§7 below) concluded +> the limit was a steady-state per-GPU-process capture ceiling requiring more processes. That +> was a **measurement artifact**: the stress probe created all tiles *visible at once*. The +> real limit is **concurrent first-frame establishment**, and serializing it fixes everything. +> Sections below are kept as the investigation record; this banner is the current truth. + +## 0. The actual mechanism (current, validated) + +**Root cause.** Each OSR browser, on its *first show*, lazily creates a `viz::FrameSinkVideoCapturer` +and does a one-time **first-frame GPU shared-image allocation** (CEF source: +`RenderWidgetHostViewOSR::ShowWithVisibility` → `CefVideoConsumerOSR`). When N browsers do that +allocation **simultaneously**, the GPU-process allocator races and the losers hit +`FrameSinkVideoCapturerImpl::MaybeCaptureFrame`'s first-frame `Stop()` — **permanent, silent +(LOG(ERROR) only), no `createFailed`, no callback**. That stuck capturer can't be revived +(re-kick / WasHidden / resize / recreate-under-load all fail). **Steady state is fine** — once +established, one GPU process drives 20+ animating views; the bug is purely the concurrent +*establishment*. + +**Fix — serialize establishment (host-side, in `cef_host`):** +1. **Create-pacer gated on first PAINT, not bind.** `CefProfileHost` sends one + `CreateBrowser` at a time and doesn't send the next until the previous browser has produced + its first frame (a few frames, or a short settle for static content), with a generous + backstop so one slow page can't block the queue. This keeps concurrent first-frame + allocations at ~1, so the race never happens. +2. **Begin-frame pump always runs** (`PumpBeginFrame`, per slot) = liveness. A blank tile is + then almost always merely *slow* (heavy page / saturated GPU), and paints on its own once + resources free — **patience, not destruction.** +3. **Patient watchdog → bounded recreate.** If a browser produces no frame within a generous + grace (~10s), `cef_host` reports `paintStalled` (a *repeating* signal) and the consumer does + a **bounded** recreate (last resort, capped → never churns). Recreate succeeds because it + too goes through the serial pacer (low contention). + +**Validated:** +- 12 concurrent *animated* tiles → **12/12 establish + animate** (was ~9/12 with permanent blanks). +- 20 *real* websites incl. WebGL/3D/video → **20/20 get content, 0 churn, ~8.5 GB / one shared host**. +- Bulk-open lights up in ~2 s (Chrome "tabs come alive" feel); steady-state is full 60 fps. +- **Patience-only (no recreate) also reached 20/20** — serialization alone prevents the silent + death; the bounded recreate is kept only for the rare genuine `Stop()`. + +**Why this is better than the old plan:** one shared `cef_host` = one profile = **shared +cookies/logins**, no pool, no cookie-sync, no Chromium patch. Per the user's spec it **never +permanently blanks**; under genuine resource pressure it **degrades gracefully** (tiles appear +over a few extra seconds) rather than blanking or churning. + +--- + +## (Investigation record below — superseded by §0) + +## 1. TL;DR (original — superseded) + +- **Symptom:** When many off-screen-rendered (OSR) webview tiles *animate at once* on a single shared `cef_host` process, a few of them (~1–4 of 12 in our stress probe) never produce a first frame and stay **blank**. Intermittent. +- **Root cause (one line):** Each OSR browser copies its pixels out through its own `viz::FrameSinkVideoCapturer`, and — empirically — one Chromium GPU/Viz process can only *establish and sustain* a limited number of concurrently-capturing views before late capturers fail to complete their first capture. In our probe that ceiling landed around ~8–10. (This is an observed number, not a documented Chromium constant — see §3.) +- **Decisive tell:** **Static** content renders 12/12 every time; only **continuous animation during establishment** loses tiles. So the bottleneck is establishment-under-concurrent-capture-load, not GPU drawing, GPU memory, or frame area/pixels. +- **Why Chrome doesn't hit this (one line):** Chrome renders windows **on-screen** via a zero-copy CALayer/IOSurface handoff to the macOS WindowServer — there is no per-view video-capture copy-out step. OSR *must* copy each view out of Chromium, and that copy machinery is what caps. +- **The fix (SUPERSEDED — see §0; serialization on one host works):** Spread animating tiles across **more GPU processes** (more `cef_host` processes), sized so each carries **≤ ~6 animating tiles**, so every tile renders at full 60fps. +- **The no-blank guarantee:** A graceful **take-turns throttle** — only ~6 views actively capture at any instant, the rest show their last frame (a freeze, never blank), and tiles come up in waves so each gets a first frame to freeze on. + +--- + +## 2. The simple explanation + +Think of each webview tile as a TV that Chromium is drawing. + +Chrome's normal mode is like **hanging real TVs on a wall**: the operating system's window server is built to juggle dozens of on-screen surfaces at once and composite them for free. Adding more TVs is cheap because the OS does the final assembly. + +Our mode (OSR) can't hang TVs on the wall — every tile has to live *inside* the Flutter canvas (draggable, zoomable, clippable, shareable with peers). So instead we point a **screen recorder at each TV** and copy its picture out frame-by-frame, then paint that copy into the canvas. + +One Chromium instance can only run a handful of these screen recorders smoothly at the same time. When too many TVs are all *playing video at once while their recorders are still warming up*, the last few recorders never finish starting — so those tiles stay blank. + +Two important details that fall out of the analogy: + +- A **still picture** is easy: every recorder captures one frame and stops. That's why static content always comes up 12/12. +- The fix isn't a faster recorder — it's **more rooms**: split the TVs across several Chromium processes so no single one is running more than ~6 recorders at once. And as a safety net, **take turns** — let only ~6 record live at a time and freeze the rest on their last frame so nothing ever goes blank. + +--- + +## 3. The technical mechanism + +### OSR delivers pixels via a per-browser `FrameSinkVideoCapturer` + +CEF/Chromium windowless (off-screen) rendering delivers each browser's pixels to the embedder through Chromium's `viz::FrameSinkVideoCapturer`. The software `OnPaint` path performs a CPU readback ("OnPaint has always been sharing the OSR pictures using FrameSinkVideoCapturer, but via CPU"); the hardware `OnAcceleratedPaint` path (reintroduced ~M124/M125, requires `shared_texture_enabled`) hands over a shared GPU texture instead of doing a CPU copy. Both ride the same `FrameSinkVideoCapturer` machinery — the difference is CPU readback vs. GPU shared-texture handoff. Either way, OSR adds a **per-view copy-each-view-out step** that on-screen rendering does not have. + +> Precision note: the literal "video-capture copy" describes the software `OnPaint` path. If flutter_cef is on (or moves to) the accelerated `OnAcceleratedPaint` shared-texture path, the per-view step is a shared-texture handoff rather than a CPU copy. It's still an extra per-view step versus on-screen delegated rendering, and it still rides the same per-browser capturer plumbing. + +### The pipeline constants (per capturer) + +From `components/viz/service/frame_sinks/video_capture/frame_sink_video_capturer_impl.h`: + +- `kDesignLimitMaxFrames = 10` — "the maximum number of frames in-flight in the capture pipeline, reflecting the storage capacity dedicated for this purpose." +- `kTargetPipelineUtilization = 0.6f` — "A safe, sustainable maximum number of frames in-flight... exceeding 60% of the design limit is considered 'red line' operation." + +So `10 × 0.6 = ~6` sustainable in-flight frames **per capturer**. The header also notes that in practice only **0–3** frames are typically in flight, depending on content-change rate and system performance. + +**Important scope (read this before quoting any number):** these constants bound in-flight frames **per `FrameSinkVideoCapturer`** (each browser's own frame pool). They are a *per-capturer pipeline depth*, **not** a documented "8–10 capturers per GPU process" cap. No Chromium source states a fixed per-Viz-process capturer limit. The two numbers measure different things — per-capturer pipeline depth (≤ ~6 in-flight, documented) vs. how many capturers one Viz process can establish and sustain at once (~8–10, **empirical, ours**) — so do not present the `6` as proof of the `~8–10`. + +### The ~8–10 concurrent-capture ceiling (empirical) + +The observed ceiling — one GPU/Viz process sustains only ~8–10 *continuously-animating* capturers before late establishers fail — is an **empirical finding from our 12-animating-tiles stress probe**, not a named constant. It most likely emerges from the aggregate of per-capturer frame pools, `VideoCaptureOracle` feedback contention, and Viz scheduling, but we did not isolate which dominates. State it as observed behavior, not a hard documented limit, and treat the exact number as probe-specific (hardware, content, and Chromium version dependent). + +The `media::VideoCaptureOracle` (`media/capture/content/video_capture_oracle.cc`) does auto-throttle, but this is a **separate mechanism** from the frame-pool/in-flight limit above — it scales **capture resolution**, not capturer concurrency. It throttles by **capable frame *area*** (pixels per frame), computed as `capture_size.GetArea() / feedback.resource_utilization` and evaluated over time windows (e.g. `kBufferUtilizationEvaluationInterval = 200ms`, `kConsumerCapabilityEvaluationInterval = 1s`). This is consumer-resource-feedback-driven **resolution scaling**. The oracle exposes only enable/disable (`kThrottlingDisabled` / `kThrottlingEnabled` / `kThrottlingActive`), with no public knob to tune the throttle math; it is self-adjusting by design (the code provides no configuration surface for the math, rather than an explicit "do not configure" assertion). (We earlier described the metric as "capable pixels per second" — the current code expresses it as a per-frame *area*, so prefer that wording.) + +### The static-vs-animated tell + +**Static content (paint-once-then-idle) renders 12/12 every time; only continuous animation during establishment loses tiles.** This pinpoints the bottleneck as **establishment under concurrent capture load**, and rules out: + +- GPU drawing — Viz produced ~540 accelerated fps for the tiles that did come up. +- GPU memory — the static case allocated all 12 surfaces fine. +- Frame area / pixels — smaller tiles (140px, 80px) did not help (see §4). + +### On-screen delegated rendering vs. OSR copy-out + +There is exactly one Viz process for all of Chromium ("There is usually only one GPU and screen to draw to"); it aggregates compositing from every renderer plus the browser process into a single compositor frame. + +For **on-screen** macOS windows, the GPU process renders web content into an IOSurface-backed texture exposed via a `CAContext`, and hands it to the browser process **by CAContext ID**; the browser wraps it in a `CALayer` "which will make the frame appear on the screen." The macOS Render Server (WindowServer) then composites all active CAContexts into the final image, owning positioning, ordering, and clipping. This is a **zero-copy layer handoff** with **no per-view capture step** — the OS natively juggles many windows. + +For **OSR**, because tiles must live inside the Flutter canvas (not as OS windows), each view's pixels must instead be copied out via its `FrameSinkVideoCapturer` (`CefVideoConsumerOSR::OnFrameCaptured` receives the captured frame on the CEF side). The Chromium *drawing* is identical to Chrome's; the cap comes entirely from the **extra copy-each-view-out step** that on-screen delegated rendering doesn't have. + +*(Sources cited inline above; full list in §8.)* + +--- + +## 4. What we tried and ruled out + +All levers were measured on the same 12-animating-tiles-on-one-host stress probe. Metric: how many tiles produce a first accelerated frame (per-slot diagnostic counters). **Don't re-run these — they're refuted.** + +| Lever tried | Result | Takeaway | +| --- | --- | --- | +| Begin-frame rate throttle (slow everyone during startup) | **Worse** — slower → fewer establish | Slowing down hurts establishment | +| Coordinated single-vsync pump (all begin-frames in phase, one source) | No change (~10) | Phase alignment irrelevant | +| Bounded-concurrency round-robin (cap N producing per tick, rotate active window) | ~11, one straggler persists; N=6/5/4 all plateau ~10–11 | Lowering N doesn't break the ceiling | +| Establishment-priority (un-painted tiles get first claim on the budget) | Still ~11 | Priority doesn't help the last tile | +| Capture resolution / smaller tiles (140px, 80px vs full grid) | **No effect** | Refutes the frame-area / pixels theory | +| `--force-gpu-mem-available-mb` (1024 / 2048 / 4096) | No effect (Apple-Silicon unified memory) | Not a GPU-memory cap | +| `--disable-gpu-watchdog` | No effect | Not the watchdog | +| `WasHidden(true)` → `WasHidden(false)` re-establish recovery | Fired ~16×, no effect | Hide/show doesn't recover | +| Gradual create (1.5s spacing between tiles) | No effect (~10) | Not a create-burst race | +| Recreate-on-stall self-heal (watchdog → dispose + recreate stalled tile) | Does **not** converge — fresh browser hits the same wall (10/12) | The wall is per-host, not per-tile | + +**Conclusion:** nothing reachable from `cef_host` or the consumer broke the ~8–10 ceiling in our probe. We attribute it to CEF/Chromium's OSR capture pipeline (per-capturer pools + oracle feedback + Viz scheduling) rather than to anything in our wiring — though we did not pinpoint the single internal cause. + +--- + +## 5. The separate Flutter-pull bug (DIFFERENT issue, fixable) + +This is a **distinct bug** from the capture ceiling and is almost certainly **masked in real Campus** — call it out separately so it isn't conflated. + +**Symptom (in the bare flutter_cef example):** rendered tiles looked *static* even though `cef_host` was producing 60fps. + +**Cause:** Flutter was not **pulling** the produced frames. `textureFrameAvailable` did not wake an idle Flutter, so `copyPixelBuffer` only fired on the probe's 2-second `setState` timer (~840 frames presented per tile vs. ~7 actually pulled). Forcing a Flutter repaint every frame made them animate. + +**Why it's masked in Campus:** Campus's canvas is essentially always animating, so Flutter never sleeps and keeps pulling frames. The bug surfaces only in standalone/idle consumers. + +**Fix (worth doing in the plugin):** drive a Flutter frame per present when `textureFrameAvailable` fires, so standalone flutter_cef consumers animate without an always-on canvas. + +--- + +## 6. The cookie / shared-login tension + +Scaling by "use more processes" collides with shared login: + +- **Shared cookies require ONE Chromium instance.** The profile (user-data) directory is guarded by Chromium's `ProcessSingleton` — on POSIX a symlink-based advisory `SingletonLock` whose target is `-` (`process_singleton_posix.cc`); a stale lock yields the familiar "profile appears to be in use by another process" error. CEF inherits this: each CEF instance needs its own `cache_path` / `root_cache_path` or it conflicts. So **one process per profile dir.** +- **The cookie store belongs to that one instance.** `CookieMonster` (`net/cookies`) is wrapped by `CookieManager` in `//services/network`, owned by the `NetworkContext` and reached via `StoragePartition::GetCookieManagerForBrowserProcess()`, backed by `SQLitePersistentCookieStore`. Cookies are isolated per `NetworkContext`; the docs describe no cross-instance sharing path. (Per the CookieMonster design doc, `CookieMonster` is *not* a singleton — one process can hold several instances: standard, incognito, extensions. So it's "one per `NetworkContext`," not literally "one per process" — but none are shared across separate Chromium processes.) +- **CEF gives one self-contained Chromium per `CefInitialize`** ("CEF can only be initialized once per process"), with its own network/cookie service, and exposes **no API to share** one network/cookie service across instances. Cross-instance cookie movement must be done by replication via `CefCookieManager` (`VisitAllCookies` + `SetCookie`). + +**So the tension is:** ONE host = shared login but ~8–10 captures; MORE hosts (more GPU processes) = more captures but **separate cookie jars**. + +**DBSC note.** Device Bound Session Credentials (DBSC) binds session cookies to a hardware-held private key (TPM on Windows; Secure Enclave intended for macOS). It reached **general availability on Windows only** (Chrome 146, ~Apr–May 2026); **macOS support is "coming in an upcoming release" — not GA on macOS as of June 2026.** DBSC is a Chrome-browser/runtime feature, not a web-content capability. We have **no source confirming or denying** that CEF (or the Alloy-style browser path) implements DBSC; the reasonable inference — given DBSC is a Chrome-runtime feature that CEF's identity surface tends to lag — is that CEF currently yields **plain, syncable cookies** for replication, but treat that as an inference, not a sourced fact. Either way DBSC is **not a blocker** for the cookie-sync approach: if CEF doesn't implement it, cookies stay plain; if it eventually does, replication would need to carry the bound credential, but that path doesn't exist on macOS today. (Terminology: in current CEF "Alloy" refers to a window *style* within the one Chrome-bootstrap runtime, not a separate runtime — the Alloy bootstrap was deprecated M125 / removed M128.) + +--- + +## 7. The fix + +**Core idea:** more GPU processes = more `cef_host` processes, each sized so it carries **≤ ~6 animating tiles** (safely under the observed ~8–10 ceiling) → every tile renders at full 60fps. (The `6` here is the per-host *animating-tile budget* we chose as a safe margin under the empirical ceiling — not the per-capturer in-flight-frame constant from §3. They happen to share a number; they are not the same thing.) + +### Shape A — Partition-by-profile (PREFERRED) + +Each profile already gets its own `cef_host` = its own GPU/Viz process, so load spreads **for free**, and cookies stay shared **within** a profile. Needs no cookie-sync. Works **unless more than ~6 animating tiles must share ONE login at once.** + +### Shape B — Pool + cookie-sync (GENERAL) + +Bucket a single profile across N hosts and replicate cookies between them via `CefCookieManager` (`VisitAllCookies` + `SetCookie`). Handles >6 animating tiles of one login. More complex (cookie replication, consistency, race handling). + +### The graceful no-blank throttle (the guarantee) + +As a safety net for when a single host *does* exceed its safe count, add a **take-turns** throttle: + +- Only ~6 webviews **actively capture** at any instant; the rest show their **last frame** — a freeze-frame, **never blank**, because the Flutter texture retains the last painted picture. +- **Rotate** which tiles are live. +- **Bring tiles up in waves of ~6**, so each one gets a first frame to freeze on. + +**Steady-state guarantee:** every tile shows live-or-frozen, never blank. The only costs are a brief per-tile "loading" before its establishment wave, and reduced fps while more than ~6 animate at once. **Below ~6 animating per host the throttle is inactive** (full 60fps). + +### Open decision question + +**Do more than ~6 *animating* tiles ever need to share one login simultaneously?** + +- If **no** → Partition-by-profile (Shape A) alone is sufficient; ship the throttle as a guarantee for edge cases. +- If **yes** → Pool + cookie-sync (Shape B) is required for that login bucket, plus the throttle. + +--- + +## 8. Sources + +- Chromium — `frame_sink_video_capturer_impl.h` (`kDesignLimitMaxFrames = 10`, `kTargetPipelineUtilization = 0.6f`): https://chromium.googlesource.com/chromium/src/+/7292bb3e6a1e6cd89d41aa5f52ecdbf030ba4191/components/viz/service/frame_sinks/video_capture/frame_sink_video_capturer_impl.h +- Chromium — `video_capture_oracle.cc` (capable frame area, throttling states/intervals): https://chromium.googlesource.com/chromium/src/media/+/refs/heads/main/capture/content/video_capture_oracle.cc +- Chromium — RenderingNG architecture (one Viz process): https://developer.chrome.com/docs/chromium/renderingng-architecture +- Chromium — Mac delegated rendering (CAContext / IOSurface / CALayer handoff): https://www.chromium.org/developers/design-documents/chromium-graphics/mac-delegated-rendering/ +- Chromium — CookieMonster design doc (CookieMonster is not a singleton): https://www.chromium.org/developers/design-documents/network-stack/cookiemonster/ +- Chromium — `net/cookies` README: https://chromium.googlesource.com/chromium/src/+/HEAD/net/cookies/README.md +- Chromium — `process_singleton.h`: https://chromium.googlesource.com/chromium/src/+/HEAD/chrome/browser/process_singleton.h +- Chromium — `process_singleton_posix.cc` (SingletonLock): https://chromium.googlesource.com/chromium/src/+/HEAD/chrome/browser/process_singleton_posix.cc +- CEF — `CefCookieManager` docs (VisitAllCookies / SetCookie): https://cef-builds.spotifycdn.com/docs/121.3/classCefCookieManager.html +- CEF — issue #3685 (per-instance cache directory): https://github.com/chromiumembedded/cef/issues/3685 +- CEF — issues #3730 / #4057 and CEF forum t=19401 (OSR capturer / FrameSinkVideoCapturer discussion): https://github.com/chromiumembedded/cef/issues/3730 · https://github.com/chromiumembedded/cef/issues/4057 · https://magpcss.org/ceforum/viewtopic.php?f=10&t=19401 +- Electron — PR #42953 / issue #41972 (OnAcceleratedPaint / shared-texture OSR): https://github.com/electron/electron/pull/42953 · https://github.com/electron/electron/issues/41972 +- DBSC — Chrome docs: https://developer.chrome.com/docs/web-platform/device-bound-session-credentials · Windows GA announcement: https://workspaceupdates.googleblog.com/2026/05/prevent-account-takeovers-with-DBSC-now-generally-available-in-the-Chrome-browser-for-Windows.html · spec: https://github.com/w3c/webappsec-dbsc \ No newline at end of file diff --git a/test/cef_web_view_test.dart b/test/cef_web_view_test.dart index ca0a73d..9b3d8e1 100644 --- a/test/cef_web_view_test.dart +++ b/test/cef_web_view_test.dart @@ -78,6 +78,44 @@ void main() { expect(args['height'], 300); }); + testWidgets('renderScale overrides the device-pixel-ratio in create', + (tester) async { + await tester.pumpWidget( + boxed(const CefWebView(url: 'about:blank', renderScale: 2.5))); + await tester.pumpAndSettle(); + final args = (callsTo('create').single.arguments as Map); + expect(args['dpr'], 2.5); + }); + + testWidgets('changing renderScale alone re-resizes at the new dpr', + (tester) async { + // Regression: the widget used to resize only on SIZE change, so a canvas zoom + // (which changes effective dpr via an ancestor transform, not the laid-out size) + // never re-rendered → blurry. It must now resize when dpr changes at fixed size. + const key = ValueKey('v'); + await tester.pumpWidget(boxed( + const CefWebView(key: key, url: 'about:blank', renderScale: 1.0))); + await tester.pumpAndSettle(); + await tester.pumpWidget(boxed( + const CefWebView(key: key, url: 'about:blank', renderScale: 3.0))); + await tester.pumpAndSettle(); + final resizes = callsTo('resize'); + expect(resizes, isNotEmpty, + reason: 'a dpr-only change must trigger a resize'); + final args = (resizes.last.arguments as Map); + expect(args['dpr'], 3.0); + expect(args['width'], 320, reason: 'same logical size'); + expect(args['height'], 240); + }); + + testWidgets('renderScale is clamped to the native ceiling (<= 8)', + (tester) async { + await tester.pumpWidget( + boxed(const CefWebView(url: 'about:blank', renderScale: 99))); + await tester.pumpAndSettle(); + expect((callsTo('create').single.arguments as Map)['dpr'], 8.0); + }); + testWidgets('navigates when the url property changes', (tester) async { const key = ValueKey('v'); await tester diff --git a/test/perf_sample.sh b/test/perf_sample.sh new file mode 100755 index 0000000..4c318ff --- /dev/null +++ b/test/perf_sample.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# +# Sample cef_host process count + total RSS + the host-app fd count while the +# stress probe (example/lib/stress_probe.dart) runs. Pair the CSV here with the +# CEF_STRESS frame-timing rows the probe writes to /tmp/cef_stress.jsonl. +# +# Usage: ./test/perf_sample.sh [seconds] [interval] +# +SECS="${1:-30}"; IV="${2:-2}" +echo "t,cef_procs,cef_rss_mb,cef_cpu,app_fds" +for ((t=0; t<=SECS; t+=IV)); do + pids=$(pgrep -f cef_host 2>/dev/null | tr '\n' ',' | sed 's/,$//') + procs=$(printf '%s' "$pids" | awk -F, '{print ($1==""?0:NF)}') + if [ -n "$pids" ]; then + rss=$(ps -o rss= -p "$pids" 2>/dev/null | awk '{s+=$1} END{printf "%.0f", s/1024}') + cpu=$(ps -o %cpu= -p "$pids" 2>/dev/null | awk '{s+=$1} END{printf "%.0f", s}') + else + rss=0; cpu=0 + fi + app=$(pgrep -f flutter_cef_example 2>/dev/null | head -1) + fds=$([ -n "$app" ] && lsof -p "$app" 2>/dev/null | wc -l | tr -d ' ' || echo 0) + echo "$t,${procs:-0},${rss:-0},${cpu:-0},${fds:-0}" + sleep "$IV" +done diff --git a/test/run_cascade_probe.sh b/test/run_cascade_probe.sh new file mode 100755 index 0000000..e038f88 --- /dev/null +++ b/test/run_cascade_probe.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# +# flutter_cef cascade / never-blank probe — REAL cef_host, asserting. +# +# WHY THIS EXISTS: the never-blank guarantee (serialized establishment via the +# paint-gated create-pacer + sliding window K + bounded recreate) and the cascade +# speed are GPU/host behaviors the mocked Dart tests can't exercise. This launches +# the stress probe against a REAL cef_host with N concurrently-created animating +# tiles and asserts EVERY tile reaches a first accelerated frame (paints>0) — i.e. +# none stays permanently blank — and reports the establishment cascade time. +# Run it before bumping a consumer's pin / merging pacer or establishment changes. +# +# Usage: +# ./test/run_cascade_probe.sh # N=12 tiles, window=3 (defaults) +# CEF_N=20 CEF_WINDOW=3 ./test/run_cascade_probe.sh +# +# Env: +# FLUTTER flutter binary (default: `flutter` on PATH) +# FLUTTER_CEF_HOST cef_host binary (default: build/cef_host, built if absent) +# CEF_N tiles created at once (default 12) +# CEF_WINDOW FLUTTER_CEF_ESTAB_WINDOW establishment concurrency (default 3) +# CEF_SECS run seconds before asserting (default 30 — room to self-heal) +set -uo pipefail +cd "$(dirname "$0")/.." +ROOT="$PWD" +FLUTTER="${FLUTTER:-flutter}" +N="${CEF_N:-12}" +WINDOW="${CEF_WINDOW:-3}" +SECS="${CEF_SECS:-30}" +APP="$ROOT/example/build/macos/Build/Products/Debug/flutter_cef_example.app/Contents/MacOS/flutter_cef_example" + +HOST="${FLUTTER_CEF_HOST:-}" +if [ -z "$HOST" ]; then + HOST="$ROOT/build/cef_host/cef_host.app/Contents/MacOS/cef_host" + if [ ! -x "$HOST" ]; then + echo ">> building ad-hoc cef_host (needs cmake + ninja)…" + ( cd packages/flutter_cef_macos && CEF_HOST_ADHOC=ON ./native/build_cef_host.sh "$ROOT/build/cef_host" ) || { + echo "!! cef_host build failed — set FLUTTER_CEF_HOST to a prebuilt binary"; exit 2; } + fi +fi +echo ">> cef_host: $HOST N=$N window=$WINDOW" + +echo ">> building stress probe…" +( cd example && "$FLUTTER" build macos --debug \ + --dart-define=CEF_POOL=1 --dart-define=CEF_INITIAL="$N" \ + --dart-define=CEF_RECREATE_ON_STALL=true \ + -t lib/stress_probe.dart ) || { echo "!! example build failed"; exit 2; } + +LOG="/tmp/cef_cascade_$$.log"; : > "$LOG" +pkill -9 -f flutter_cef_example 2>/dev/null; pkill -9 -f "MacOS/cef_host" 2>/dev/null; sleep 1 +# Ad-hoc host downgrades named profiles to ephemeral unless allowed — the probe +# uses a shared named profile (CEF_POOL=1), so keep it on the real shared host. +FLUTTER_CEF_DEBUG=1 FLUTTER_CEF_ALLOW_INSECURE_PROFILE=1 \ + FLUTTER_CEF_ESTAB_WINDOW="$WINDOW" FLUTTER_CEF_HOST="$HOST" \ + nohup "$APP" > "$LOG" 2>&1 & +APP_PID=$! +for _ in $(seq 1 "$SECS"); do sleep 1; done +pkill -9 -f flutter_cef_example 2>/dev/null; pkill -9 -f "MacOS/cef_host" 2>/dev/null + +# Count distinct browsers that reached a first accelerated frame (paints>0). +EST=$(python3 - "$LOG" <<'PY' +import re, sys +seen = set() +for line in open(sys.argv[1], errors="replace"): + m = re.search(r"wire=(\d+) pumpTicks=\d+ paints=(\d+)", line) + if m and int(m.group(2)) > 0: + seen.add(m.group(1)) +print(len(seen)) +PY +) +EST="${EST:-0}" +echo ">> established $EST / $N (log: $LOG)" +if [ "$EST" -lt "$N" ]; then + echo "!! FAIL: $((N-EST)) tile(s) never produced a first frame (permanent blank)" + exit 1 +fi +echo ">> PASS: every tile rendered"