fix: handle out of order recording errors in skia graphite (#49608)

This commit is contained in:
Robo
2026-02-02 22:20:19 +09:00
committed by GitHub
parent 1159de52e7
commit db5606b4b3
2 changed files with 349 additions and 0 deletions

View File

@@ -144,3 +144,4 @@ fix_linux_tray_id.patch
expose_gtk_ui_platform_field.patch
fix_os_crypt_async_cookie_encryption.patch
patch_osr_control_screen_info.patch
graphite_handle_out_of_order_recording_errors.patch

View File

@@ -0,0 +1,348 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sunny Sachanandani <sunnyps@chromium.org>
Date: Fri, 30 Jan 2026 12:51:05 -0800
Subject: [graphite] Handle out of order recording errors
Explicitly handle out of order recording errors to crash just like async
shader compile failed errors. Also, emit the insert status to an UMA
histogram at 1% subsampling and log all error statuses at runtime.
Bug: 458722690
Change-Id: Id94657be6ae870dcf8adba71aff216386a6a6964
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/7533855
Commit-Queue: Sunny Sachanandani <sunnyps@chromium.org>
Auto-Submit: Sunny Sachanandani <sunnyps@chromium.org>
Reviewed-by: Jonathan Ross <jonross@chromium.org>
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Cr-Commit-Position: refs/heads/main@{#1577487}
diff --git a/gpu/command_buffer/service/graphite_shared_context.cc b/gpu/command_buffer/service/graphite_shared_context.cc
index f9a47ac16bddc2352d81f43629d8ce8c2d2b3b99..9856d68f893e4329fda5d002835613161b346bc1 100644
--- a/gpu/command_buffer/service/graphite_shared_context.cc
+++ b/gpu/command_buffer/service/graphite_shared_context.cc
@@ -4,7 +4,10 @@
#include "gpu/command_buffer/service/graphite_shared_context.h"
+#include "base/logging.h"
#include "base/memory/ptr_util.h"
+#include "base/metrics/histogram_macros.h"
+#include "base/rand_util.h"
#include "base/task/single_thread_task_runner.h"
#include "gpu/command_buffer/common/shm_count.h"
#include "third_party/skia/include/core/SkColorSpace.h"
@@ -14,6 +17,44 @@
namespace gpu {
namespace {
+// This is emitted to UMA - values should not be reordered, only appended!
+// LINT.IfChange(InsertRecordingStatusUma)
+enum class InsertRecordingStatusUma {
+ kSuccess,
+ kInvalidRecording,
+ kPromiseImageInstantiationFailed,
+ kAddCommandsFailed,
+ kAsyncShaderCompilesFailed,
+ kOutOfOrderRecording,
+ kMaxValue = kOutOfOrderRecording
+};
+// LINT.ThenChange(//tools/metrics/histograms/metadata/gpu/enums.xml:GraphiteInsertRecordingStatus)
+
+InsertRecordingStatusUma InsertRecordingStatusUma(
+ skgpu::graphite::InsertStatus insert_status) {
+ // InsertStatus almost behaves like an enum class, but not quite since it can
+ // convert to both bool and integer types and can't be used in a switch.
+ if (insert_status == skgpu::graphite::InsertStatus::kSuccess) {
+ return InsertRecordingStatusUma::kSuccess;
+ } else if (insert_status ==
+ skgpu::graphite::InsertStatus::kInvalidRecording) {
+ return InsertRecordingStatusUma::kInvalidRecording;
+ } else if (insert_status ==
+ skgpu::graphite::InsertStatus::kPromiseImageInstantiationFailed) {
+ return InsertRecordingStatusUma::kPromiseImageInstantiationFailed;
+ } else if (insert_status ==
+ skgpu::graphite::InsertStatus::kAddCommandsFailed) {
+ return InsertRecordingStatusUma::kAddCommandsFailed;
+ } else if (insert_status ==
+ skgpu::graphite::InsertStatus::kAsyncShaderCompilesFailed) {
+ return InsertRecordingStatusUma::kAsyncShaderCompilesFailed;
+ } else if (insert_status ==
+ skgpu::graphite::InsertStatus::kOutOfOrderRecording) {
+ return InsertRecordingStatusUma::kOutOfOrderRecording;
+ }
+ NOTREACHED();
+}
+
struct RecordingContext {
skgpu::graphite::GpuFinishedProc old_finished_proc;
skgpu::graphite::GpuFinishedContext old_context;
@@ -216,20 +257,38 @@ bool GraphiteSharedContext::InsertRecordingImpl(
auto insert_status = graphite_context_->insertRecording(*info_ptr);
- // TODO(433845560): Check the kAddCommandsFailed failures.
- // Crash only if we're not simulating a failure for testing.
const bool simulating_insert_failure =
info_ptr->fSimulatedStatus != skgpu::graphite::InsertStatus::kSuccess;
- // InsertStatus::kAsyncShaderCompilesFailed is also an unrecoverable error for
- // which we should also clear the disk shader cache in case the error was due
- // to a corrupted cached shader blob.
+ // Crash, log, or emit UMA only if we're not simulating a failure for testing.
+ if (!simulating_insert_failure) {
+ if (base::ShouldRecordSubsampledMetric(0.01)) {
+ UMA_HISTOGRAM_ENUMERATION("GPU.Graphite.InsertRecordingStatus",
+ InsertRecordingStatusUma(insert_status));
+ }
+ if (insert_status != skgpu::graphite::InsertStatus::kSuccess) {
+ // skgpu::graphite::InsertStatus almost behaves like an enum class, but
+ // not quite - it can't be static_cast to an int.
+ LOG(ERROR) << "Graphite insertRecording failed with status "
+ << static_cast<int>(InsertRecordingStatusUma(insert_status));
+ }
+ }
+
+ // kAsyncShaderCompilesFailed and kOutOfOrderRecording are unrecoverable
+ // failures because they cause future recordings to be rendered incorrectly.
+ // TODO(433845560): Check the kAddCommandsFailed failures.
+ const bool is_unrecoverable_failure =
+ insert_status ==
+ skgpu::graphite::InsertStatus::kAsyncShaderCompilesFailed ||
+ insert_status == skgpu::graphite::InsertStatus::kOutOfOrderRecording;
+ // For kAsyncShaderCompilesFailed, we should also clear the disk shader
+ // cache in case the error was due to a corrupted cached shader blob.
+ std::optional<GpuProcessShmCount::ScopedIncrement> use_shader_cache;
if (insert_status ==
skgpu::graphite::InsertStatus::kAsyncShaderCompilesFailed) {
- GpuProcessShmCount::ScopedIncrement use_shader_cache(
- use_shader_cache_shm_count_);
- CHECK(simulating_insert_failure);
+ use_shader_cache.emplace(use_shader_cache_shm_count_);
}
+ CHECK(simulating_insert_failure || !is_unrecoverable_failure);
// All other failure modes are recoverable in the sense that future recordings
// will be rendered correctly, so merely return a boolean here so that callers
diff --git a/gpu/command_buffer/service/graphite_shared_context_unittest.cc b/gpu/command_buffer/service/graphite_shared_context_unittest.cc
index 0de107a9d86ff8217a4c537598f198f313ecf9df..852343ab6804efed962b707b748ddf1920c20ea7 100644
--- a/gpu/command_buffer/service/graphite_shared_context_unittest.cc
+++ b/gpu/command_buffer/service/graphite_shared_context_unittest.cc
@@ -6,6 +6,7 @@
#include "base/threading/thread.h"
#include "gpu/command_buffer/common/shm_count.h"
+#include "gpu/command_buffer/service/skia_utils.h"
#include "skia/buildflags.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -86,8 +87,7 @@ class GraphiteSharedContextTest : public testing::TestWithParam<bool> {
wgpu::DeviceDescriptor device_desc = {};
- wgpu::Device device =
- wgpu::Adapter(adapters[0].Get()).CreateDevice(&device_desc);
+ auto device = wgpu::Adapter(adapters[0].Get()).CreateDevice(&device_desc);
CHECK(device);
skgpu::graphite::DawnBackendContext backend_context = {};
@@ -95,7 +95,11 @@ class GraphiteSharedContextTest : public testing::TestWithParam<bool> {
backend_context.fDevice = device;
backend_context.fQueue = device.GetQueue();
- skgpu::graphite::ContextOptions context_options = {};
+ // Use the default Graphite context options that Chromium uses e.g. disallow
+ // things like out of order recordings.
+ gpu::GpuDriverBugWorkarounds workarounds;
+ auto context_options = GetDefaultGraphiteContextOptions(workarounds);
+
graphite_shared_context_ = std::make_unique<GraphiteSharedContext>(
skgpu::graphite::ContextFactory::MakeDawn(backend_context,
context_options),
@@ -133,14 +137,12 @@ TEST_P(GraphiteSharedContextTest, ConcurrentAccess) {
auto run_graphite_functions =
[](GraphiteSharedContext* graphite_shared_context) {
// Call a method that acquires the lock
- std::unique_ptr<skgpu::graphite::Recorder> recorder =
- graphite_shared_context->makeRecorder();
+ auto recorder = graphite_shared_context->makeRecorder();
EXPECT_TRUE(recorder);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 10; ++j) {
- std::unique_ptr<skgpu::graphite::Recording> recording =
- recorder->snap();
+ auto recording = recorder->snap();
skgpu::graphite::InsertRecordingInfo info = {};
info.fRecording = recording.get();
EXPECT_TRUE(recording);
@@ -172,18 +174,17 @@ TEST_P(GraphiteSharedContextTest, ConcurrentAccess) {
}
TEST_P(GraphiteSharedContextTest, AsyncShaderCompilesFailed) {
- std::unique_ptr<skgpu::graphite::Recorder> recorder =
- graphite_shared_context_->makeRecorder();
+ auto recorder = graphite_shared_context_->makeRecorder();
EXPECT_TRUE(recorder);
auto ii = SkImageInfo::Make(64, 64, kN32_SkColorType, kPremul_SkAlphaType);
- sk_sp<SkSurface> surface1 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ auto surface1 = SkSurfaces::RenderTarget(recorder.get(), ii);
surface1->getCanvas()->clear(SK_ColorRED);
- sk_sp<SkSurface> surface2 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ auto surface2 = SkSurfaces::RenderTarget(recorder.get(), ii);
surface2->getCanvas()->drawImage(surface1->makeTemporaryImage(), 0, 0);
- std::unique_ptr<skgpu::graphite::Recording> recording = recorder->snap();
+ auto recording = recorder->snap();
EXPECT_TRUE(recording);
skgpu::graphite::InsertRecordingInfo info = {};
@@ -197,19 +198,43 @@ TEST_P(GraphiteSharedContextTest, AsyncShaderCompilesFailed) {
EXPECT_FALSE(graphite_shared_context_->insertRecording(info));
}
+TEST_P(GraphiteSharedContextTest, OutOfOrderRecording) {
+ auto recorder = graphite_shared_context_->makeRecorder();
+ EXPECT_TRUE(recorder);
+
+ auto ii = SkImageInfo::Make(64, 64, kN32_SkColorType, kPremul_SkAlphaType);
+ auto surface1 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ surface1->getCanvas()->clear(SK_ColorRED);
+ auto recording1 = recorder->snap();
+ EXPECT_TRUE(recording1);
+
+ auto surface2 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ surface2->getCanvas()->drawImage(surface1->makeTemporaryImage(), 0, 0);
+ auto recording2 = recorder->snap();
+ EXPECT_TRUE(recording2);
+
+ skgpu::graphite::InsertRecordingInfo info = {};
+
+ info.fRecording = recording2.get();
+ graphite_shared_context_->insertRecording(info);
+
+ info.fRecording = recording1.get();
+ EXPECT_DEATH_IF_SUPPORTED(graphite_shared_context_->insertRecording(info),
+ "");
+}
+
TEST_P(GraphiteSharedContextTest, AddCommandsFailed) {
- std::unique_ptr<skgpu::graphite::Recorder> recorder =
- graphite_shared_context_->makeRecorder();
+ auto recorder = graphite_shared_context_->makeRecorder();
EXPECT_TRUE(recorder);
auto ii = SkImageInfo::Make(64, 64, kN32_SkColorType, kPremul_SkAlphaType);
- sk_sp<SkSurface> surface1 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ auto surface1 = SkSurfaces::RenderTarget(recorder.get(), ii);
surface1->getCanvas()->clear(SK_ColorRED);
- sk_sp<SkSurface> surface2 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ auto surface2 = SkSurfaces::RenderTarget(recorder.get(), ii);
surface2->getCanvas()->drawImage(surface1->makeTemporaryImage(), 0, 0);
- std::unique_ptr<skgpu::graphite::Recording> recording = recorder->snap();
+ auto recording = recorder->snap();
EXPECT_TRUE(recording);
skgpu::graphite::InsertRecordingInfo info = {};
@@ -223,39 +248,37 @@ TEST_P(GraphiteSharedContextTest, AddCommandsFailed) {
}
TEST_P(GraphiteSharedContextTest, LowPendingRecordings) {
- std::unique_ptr<skgpu::graphite::Recorder> recorder =
- graphite_shared_context_->makeRecorder();
+ auto recorder = graphite_shared_context_->makeRecorder();
EXPECT_TRUE(recorder);
- std::unique_ptr<skgpu::graphite::Recording> recording = recorder->snap();
- EXPECT_TRUE(recording);
-
- skgpu::graphite::InsertRecordingInfo info = {};
- info.fRecording = recording.get();
-
// No flush is expected if the number of pending recordings is low.
EXPECT_CALL(backend_flush_callback_, Flush()).Times(0);
for (size_t i = 0; i < kMaxPendingRecordings - 1; ++i) {
+ auto recording = recorder->snap();
+ EXPECT_TRUE(recording);
+
+ skgpu::graphite::InsertRecordingInfo info = {};
+ info.fRecording = recording.get();
+
EXPECT_TRUE(graphite_shared_context_->insertRecording(info));
}
}
TEST_P(GraphiteSharedContextTest, MaxPendingRecordings) {
- std::unique_ptr<skgpu::graphite::Recorder> recorder =
- graphite_shared_context_->makeRecorder();
+ auto recorder = graphite_shared_context_->makeRecorder();
EXPECT_TRUE(recorder);
- std::unique_ptr<skgpu::graphite::Recording> recording = recorder->snap();
- EXPECT_TRUE(recording);
-
- skgpu::graphite::InsertRecordingInfo info = {};
- info.fRecording = recording.get();
-
// Expect a flush when the number of pending recordings reaches the max.
EXPECT_CALL(backend_flush_callback_, Flush()).Times(1);
for (size_t i = 0; i < kMaxPendingRecordings; ++i) {
+ auto recording = recorder->snap();
+ EXPECT_TRUE(recording);
+
+ skgpu::graphite::InsertRecordingInfo info = {};
+ info.fRecording = recording.get();
+
EXPECT_TRUE(graphite_shared_context_->insertRecording(info));
}
}
diff --git a/tools/metrics/histograms/metadata/gpu/enums.xml b/tools/metrics/histograms/metadata/gpu/enums.xml
index 2df046e16f9186c340042506eb23a2526e515528..106bbdf71a3a067a7378486caa8423a839465455 100644
--- a/tools/metrics/histograms/metadata/gpu/enums.xml
+++ b/tools/metrics/histograms/metadata/gpu/enums.xml
@@ -1112,6 +1112,19 @@ Called by update_gpu_driver_bug_workaround_entries.py.-->
<int value="10" label="NoKillForGpuProgressInCrashDump"/>
</enum>
+<!-- LINT.IfChange(GraphiteInsertRecordingStatus) -->
+
+<enum name="GraphiteInsertRecordingStatus">
+ <int value="0" label="Success"/>
+ <int value="1" label="InvalidRecording"/>
+ <int value="2" label="PromiseImageInstantiationFailed"/>
+ <int value="3" label="AddCommandsFailed"/>
+ <int value="4" label="AsyncShaderCompilesFailed"/>
+ <int value="5" label="OutOfOrderRecording"/>
+</enum>
+
+<!-- LINT.ThenChange(//gpu/command_buffer/service/graphite_shared_context.cc:InsertRecordingStatusUma) -->
+
<enum name="HasDiscreteGpu">
<int value="0" label="No"/>
<int value="1" label="Yes"/>
diff --git a/tools/metrics/histograms/metadata/gpu/histograms.xml b/tools/metrics/histograms/metadata/gpu/histograms.xml
index 6b28a436fb4be0ba088de243f54134b4b024bfd4..f67c31921c224dcd9e94f07d60f32ec16debaaac 100644
--- a/tools/metrics/histograms/metadata/gpu/histograms.xml
+++ b/tools/metrics/histograms/metadata/gpu/histograms.xml
@@ -1033,6 +1033,16 @@ chromium-metrics-reviews@google.com.
</summary>
</histogram>
+<histogram name="Gpu.Graphite.InsertRecordingStatus"
+ enum="GraphiteInsertRecordingStatus" expires_after="2026-04-30">
+ <owner>sunnyps@chromium.org</owner>
+ <owner>michaelludwig@google.com</owner>
+ <owner>chrome-gpu-metric-alerts@chromium.org</owner>
+ <summary>
+ The return value for each Graphite insertRecording call made by Chromium.
+ </summary>
+</histogram>
+
<histogram name="GPU.GraphValidation.Duration" units="ms"
expires_after="2025-11-15">
<owner>vasilyt@chromium.org</owner>