fix: handle out of order recording errors in skia graphite (#49609)

Refs https://issues.chromium.org/issues/458722690
This commit is contained in:
Robo
2026-02-02 15:33:05 +09:00
committed by GitHub
parent ed8493c5b1
commit 3d4ce3d39b
5 changed files with 406 additions and 1 deletions

View File

@@ -144,3 +144,4 @@ fix_check_for_file_existence_before_setting_mtime.patch
viz_create_isbufferqueuesupportedandenabled.patch
viz_fix_visual_artifacts_while_resizing_window_with_dcomp.patch
fix_os_crypt_async_cookie_encryption.patch
graphite_handle_out_of_order_recording_errors.patch

View File

@@ -0,0 +1,348 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sunny Sachanandani <sunnyps@chromium.org>
Date: Fri, 30 Jan 2026 12:51:05 -0800
Subject: [graphite] Handle out of order recording errors
Explicitly handle out of order recording errors to crash just like async
shader compile failed errors. Also, emit the insert status to an UMA
histogram at 1% subsampling and log all error statuses at runtime.
Bug: 458722690
Change-Id: Id94657be6ae870dcf8adba71aff216386a6a6964
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/7533855
Commit-Queue: Sunny Sachanandani <sunnyps@chromium.org>
Auto-Submit: Sunny Sachanandani <sunnyps@chromium.org>
Reviewed-by: Jonathan Ross <jonross@chromium.org>
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Cr-Commit-Position: refs/heads/main@{#1577487}
diff --git a/gpu/command_buffer/service/graphite_shared_context.cc b/gpu/command_buffer/service/graphite_shared_context.cc
index f9a47ac16bddc2352d81f43629d8ce8c2d2b3b99..9856d68f893e4329fda5d002835613161b346bc1 100644
--- a/gpu/command_buffer/service/graphite_shared_context.cc
+++ b/gpu/command_buffer/service/graphite_shared_context.cc
@@ -4,7 +4,10 @@
#include "gpu/command_buffer/service/graphite_shared_context.h"
+#include "base/logging.h"
#include "base/memory/ptr_util.h"
+#include "base/metrics/histogram_macros.h"
+#include "base/rand_util.h"
#include "base/task/single_thread_task_runner.h"
#include "gpu/command_buffer/common/shm_count.h"
#include "third_party/skia/include/core/SkColorSpace.h"
@@ -14,6 +17,44 @@
namespace gpu {
namespace {
+// This is emitted to UMA - values should not be reordered, only appended!
+// LINT.IfChange(InsertRecordingStatusUma)
+enum class InsertRecordingStatusUma {
+ kSuccess,
+ kInvalidRecording,
+ kPromiseImageInstantiationFailed,
+ kAddCommandsFailed,
+ kAsyncShaderCompilesFailed,
+ kOutOfOrderRecording,
+ kMaxValue = kOutOfOrderRecording
+};
+// LINT.ThenChange(//tools/metrics/histograms/metadata/gpu/enums.xml:GraphiteInsertRecordingStatus)
+
+InsertRecordingStatusUma InsertRecordingStatusUma(
+ skgpu::graphite::InsertStatus insert_status) {
+ // InsertStatus almost behaves like an enum class, but not quite since it can
+ // convert to both bool and integer types and can't be used in a switch.
+ if (insert_status == skgpu::graphite::InsertStatus::kSuccess) {
+ return InsertRecordingStatusUma::kSuccess;
+ } else if (insert_status ==
+ skgpu::graphite::InsertStatus::kInvalidRecording) {
+ return InsertRecordingStatusUma::kInvalidRecording;
+ } else if (insert_status ==
+ skgpu::graphite::InsertStatus::kPromiseImageInstantiationFailed) {
+ return InsertRecordingStatusUma::kPromiseImageInstantiationFailed;
+ } else if (insert_status ==
+ skgpu::graphite::InsertStatus::kAddCommandsFailed) {
+ return InsertRecordingStatusUma::kAddCommandsFailed;
+ } else if (insert_status ==
+ skgpu::graphite::InsertStatus::kAsyncShaderCompilesFailed) {
+ return InsertRecordingStatusUma::kAsyncShaderCompilesFailed;
+ } else if (insert_status ==
+ skgpu::graphite::InsertStatus::kOutOfOrderRecording) {
+ return InsertRecordingStatusUma::kOutOfOrderRecording;
+ }
+ NOTREACHED();
+}
+
struct RecordingContext {
skgpu::graphite::GpuFinishedProc old_finished_proc;
skgpu::graphite::GpuFinishedContext old_context;
@@ -216,20 +257,38 @@ bool GraphiteSharedContext::InsertRecordingImpl(
auto insert_status = graphite_context_->insertRecording(*info_ptr);
- // TODO(433845560): Check the kAddCommandsFailed failures.
- // Crash only if we're not simulating a failure for testing.
const bool simulating_insert_failure =
info_ptr->fSimulatedStatus != skgpu::graphite::InsertStatus::kSuccess;
- // InsertStatus::kAsyncShaderCompilesFailed is also an unrecoverable error for
- // which we should also clear the disk shader cache in case the error was due
- // to a corrupted cached shader blob.
+ // Crash, log, or emit UMA only if we're not simulating a failure for testing.
+ if (!simulating_insert_failure) {
+ if (base::ShouldRecordSubsampledMetric(0.01)) {
+ UMA_HISTOGRAM_ENUMERATION("GPU.Graphite.InsertRecordingStatus",
+ InsertRecordingStatusUma(insert_status));
+ }
+ if (insert_status != skgpu::graphite::InsertStatus::kSuccess) {
+ // skgpu::graphite::InsertStatus almost behaves like an enum class, but
+ // not quite - it can't be static_cast to an int.
+ LOG(ERROR) << "Graphite insertRecording failed with status "
+ << static_cast<int>(InsertRecordingStatusUma(insert_status));
+ }
+ }
+
+ // kAsyncShaderCompilesFailed and kOutOfOrderRecording are unrecoverable
+ // failures because they cause future recordings to be rendered incorrectly.
+ // TODO(433845560): Check the kAddCommandsFailed failures.
+ const bool is_unrecoverable_failure =
+ insert_status ==
+ skgpu::graphite::InsertStatus::kAsyncShaderCompilesFailed ||
+ insert_status == skgpu::graphite::InsertStatus::kOutOfOrderRecording;
+ // For kAsyncShaderCompilesFailed, we should also clear the disk shader
+ // cache in case the error was due to a corrupted cached shader blob.
+ std::optional<GpuProcessShmCount::ScopedIncrement> use_shader_cache;
if (insert_status ==
skgpu::graphite::InsertStatus::kAsyncShaderCompilesFailed) {
- GpuProcessShmCount::ScopedIncrement use_shader_cache(
- use_shader_cache_shm_count_);
- CHECK(simulating_insert_failure);
+ use_shader_cache.emplace(use_shader_cache_shm_count_);
}
+ CHECK(simulating_insert_failure || !is_unrecoverable_failure);
// All other failure modes are recoverable in the sense that future recordings
// will be rendered correctly, so merely return a boolean here so that callers
diff --git a/gpu/command_buffer/service/graphite_shared_context_unittest.cc b/gpu/command_buffer/service/graphite_shared_context_unittest.cc
index 0de107a9d86ff8217a4c537598f198f313ecf9df..852343ab6804efed962b707b748ddf1920c20ea7 100644
--- a/gpu/command_buffer/service/graphite_shared_context_unittest.cc
+++ b/gpu/command_buffer/service/graphite_shared_context_unittest.cc
@@ -6,6 +6,7 @@
#include "base/threading/thread.h"
#include "gpu/command_buffer/common/shm_count.h"
+#include "gpu/command_buffer/service/skia_utils.h"
#include "skia/buildflags.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -86,8 +87,7 @@ class GraphiteSharedContextTest : public testing::TestWithParam<bool> {
wgpu::DeviceDescriptor device_desc = {};
- wgpu::Device device =
- wgpu::Adapter(adapters[0].Get()).CreateDevice(&device_desc);
+ auto device = wgpu::Adapter(adapters[0].Get()).CreateDevice(&device_desc);
CHECK(device);
skgpu::graphite::DawnBackendContext backend_context = {};
@@ -95,7 +95,11 @@ class GraphiteSharedContextTest : public testing::TestWithParam<bool> {
backend_context.fDevice = device;
backend_context.fQueue = device.GetQueue();
- skgpu::graphite::ContextOptions context_options = {};
+ // Use the default Graphite context options that Chromium uses e.g. disallow
+ // things like out of order recordings.
+ gpu::GpuDriverBugWorkarounds workarounds;
+ auto context_options = GetDefaultGraphiteContextOptions(workarounds);
+
graphite_shared_context_ = std::make_unique<GraphiteSharedContext>(
skgpu::graphite::ContextFactory::MakeDawn(backend_context,
context_options),
@@ -133,14 +137,12 @@ TEST_P(GraphiteSharedContextTest, ConcurrentAccess) {
auto run_graphite_functions =
[](GraphiteSharedContext* graphite_shared_context) {
// Call a method that acquires the lock
- std::unique_ptr<skgpu::graphite::Recorder> recorder =
- graphite_shared_context->makeRecorder();
+ auto recorder = graphite_shared_context->makeRecorder();
EXPECT_TRUE(recorder);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 10; ++j) {
- std::unique_ptr<skgpu::graphite::Recording> recording =
- recorder->snap();
+ auto recording = recorder->snap();
skgpu::graphite::InsertRecordingInfo info = {};
info.fRecording = recording.get();
EXPECT_TRUE(recording);
@@ -172,18 +174,17 @@ TEST_P(GraphiteSharedContextTest, ConcurrentAccess) {
}
TEST_P(GraphiteSharedContextTest, AsyncShaderCompilesFailed) {
- std::unique_ptr<skgpu::graphite::Recorder> recorder =
- graphite_shared_context_->makeRecorder();
+ auto recorder = graphite_shared_context_->makeRecorder();
EXPECT_TRUE(recorder);
auto ii = SkImageInfo::Make(64, 64, kN32_SkColorType, kPremul_SkAlphaType);
- sk_sp<SkSurface> surface1 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ auto surface1 = SkSurfaces::RenderTarget(recorder.get(), ii);
surface1->getCanvas()->clear(SK_ColorRED);
- sk_sp<SkSurface> surface2 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ auto surface2 = SkSurfaces::RenderTarget(recorder.get(), ii);
surface2->getCanvas()->drawImage(surface1->makeTemporaryImage(), 0, 0);
- std::unique_ptr<skgpu::graphite::Recording> recording = recorder->snap();
+ auto recording = recorder->snap();
EXPECT_TRUE(recording);
skgpu::graphite::InsertRecordingInfo info = {};
@@ -197,19 +198,43 @@ TEST_P(GraphiteSharedContextTest, AsyncShaderCompilesFailed) {
EXPECT_FALSE(graphite_shared_context_->insertRecording(info));
}
+TEST_P(GraphiteSharedContextTest, OutOfOrderRecording) {
+ auto recorder = graphite_shared_context_->makeRecorder();
+ EXPECT_TRUE(recorder);
+
+ auto ii = SkImageInfo::Make(64, 64, kN32_SkColorType, kPremul_SkAlphaType);
+ auto surface1 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ surface1->getCanvas()->clear(SK_ColorRED);
+ auto recording1 = recorder->snap();
+ EXPECT_TRUE(recording1);
+
+ auto surface2 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ surface2->getCanvas()->drawImage(surface1->makeTemporaryImage(), 0, 0);
+ auto recording2 = recorder->snap();
+ EXPECT_TRUE(recording2);
+
+ skgpu::graphite::InsertRecordingInfo info = {};
+
+ info.fRecording = recording2.get();
+ graphite_shared_context_->insertRecording(info);
+
+ info.fRecording = recording1.get();
+ EXPECT_DEATH_IF_SUPPORTED(graphite_shared_context_->insertRecording(info),
+ "");
+}
+
TEST_P(GraphiteSharedContextTest, AddCommandsFailed) {
- std::unique_ptr<skgpu::graphite::Recorder> recorder =
- graphite_shared_context_->makeRecorder();
+ auto recorder = graphite_shared_context_->makeRecorder();
EXPECT_TRUE(recorder);
auto ii = SkImageInfo::Make(64, 64, kN32_SkColorType, kPremul_SkAlphaType);
- sk_sp<SkSurface> surface1 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ auto surface1 = SkSurfaces::RenderTarget(recorder.get(), ii);
surface1->getCanvas()->clear(SK_ColorRED);
- sk_sp<SkSurface> surface2 = SkSurfaces::RenderTarget(recorder.get(), ii);
+ auto surface2 = SkSurfaces::RenderTarget(recorder.get(), ii);
surface2->getCanvas()->drawImage(surface1->makeTemporaryImage(), 0, 0);
- std::unique_ptr<skgpu::graphite::Recording> recording = recorder->snap();
+ auto recording = recorder->snap();
EXPECT_TRUE(recording);
skgpu::graphite::InsertRecordingInfo info = {};
@@ -223,39 +248,37 @@ TEST_P(GraphiteSharedContextTest, AddCommandsFailed) {
}
TEST_P(GraphiteSharedContextTest, LowPendingRecordings) {
- std::unique_ptr<skgpu::graphite::Recorder> recorder =
- graphite_shared_context_->makeRecorder();
+ auto recorder = graphite_shared_context_->makeRecorder();
EXPECT_TRUE(recorder);
- std::unique_ptr<skgpu::graphite::Recording> recording = recorder->snap();
- EXPECT_TRUE(recording);
-
- skgpu::graphite::InsertRecordingInfo info = {};
- info.fRecording = recording.get();
-
// No flush is expected if the number of pending recordings is low.
EXPECT_CALL(backend_flush_callback_, Flush()).Times(0);
for (size_t i = 0; i < kMaxPendingRecordings - 1; ++i) {
+ auto recording = recorder->snap();
+ EXPECT_TRUE(recording);
+
+ skgpu::graphite::InsertRecordingInfo info = {};
+ info.fRecording = recording.get();
+
EXPECT_TRUE(graphite_shared_context_->insertRecording(info));
}
}
TEST_P(GraphiteSharedContextTest, MaxPendingRecordings) {
- std::unique_ptr<skgpu::graphite::Recorder> recorder =
- graphite_shared_context_->makeRecorder();
+ auto recorder = graphite_shared_context_->makeRecorder();
EXPECT_TRUE(recorder);
- std::unique_ptr<skgpu::graphite::Recording> recording = recorder->snap();
- EXPECT_TRUE(recording);
-
- skgpu::graphite::InsertRecordingInfo info = {};
- info.fRecording = recording.get();
-
// Expect a flush when the number of pending recordings reaches the max.
EXPECT_CALL(backend_flush_callback_, Flush()).Times(1);
for (size_t i = 0; i < kMaxPendingRecordings; ++i) {
+ auto recording = recorder->snap();
+ EXPECT_TRUE(recording);
+
+ skgpu::graphite::InsertRecordingInfo info = {};
+ info.fRecording = recording.get();
+
EXPECT_TRUE(graphite_shared_context_->insertRecording(info));
}
}
diff --git a/tools/metrics/histograms/metadata/gpu/enums.xml b/tools/metrics/histograms/metadata/gpu/enums.xml
index 659d7721751a10f24dd1cf21b2b67baee17dfd52..91439cd24cc51a8fd762d4d079c56495965c2087 100644
--- a/tools/metrics/histograms/metadata/gpu/enums.xml
+++ b/tools/metrics/histograms/metadata/gpu/enums.xml
@@ -1112,6 +1112,19 @@ Called by update_gpu_driver_bug_workaround_entries.py.-->
<int value="10" label="NoKillForGpuProgressInCrashDump"/>
</enum>
+<!-- LINT.IfChange(GraphiteInsertRecordingStatus) -->
+
+<enum name="GraphiteInsertRecordingStatus">
+ <int value="0" label="Success"/>
+ <int value="1" label="InvalidRecording"/>
+ <int value="2" label="PromiseImageInstantiationFailed"/>
+ <int value="3" label="AddCommandsFailed"/>
+ <int value="4" label="AsyncShaderCompilesFailed"/>
+ <int value="5" label="OutOfOrderRecording"/>
+</enum>
+
+<!-- LINT.ThenChange(//gpu/command_buffer/service/graphite_shared_context.cc:InsertRecordingStatusUma) -->
+
<enum name="HasDiscreteGpu">
<int value="0" label="No"/>
<int value="1" label="Yes"/>
diff --git a/tools/metrics/histograms/metadata/gpu/histograms.xml b/tools/metrics/histograms/metadata/gpu/histograms.xml
index 9524b4f806478ab8be823f9a18f187e8d8449650..df6916ee3d33cc9f0eb665b7506a9302d1cfeb0b 100644
--- a/tools/metrics/histograms/metadata/gpu/histograms.xml
+++ b/tools/metrics/histograms/metadata/gpu/histograms.xml
@@ -1011,6 +1011,16 @@ chromium-metrics-reviews@google.com.
</summary>
</histogram>
+<histogram name="Gpu.Graphite.InsertRecordingStatus"
+ enum="GraphiteInsertRecordingStatus" expires_after="2026-04-30">
+ <owner>sunnyps@chromium.org</owner>
+ <owner>michaelludwig@google.com</owner>
+ <owner>chrome-gpu-metric-alerts@chromium.org</owner>
+ <summary>
+ The return value for each Graphite insertRecording call made by Chromium.
+ </summary>
+</histogram>
+
<histogram name="GPU.GraphValidation.Duration" units="ms"
expires_after="2025-11-15">
<owner>vasilyt@chromium.org</owner>

View File

@@ -12,5 +12,6 @@
{ "patch_dir": "src/electron/patches/ReactiveObjC", "repo": "src/third_party/squirrel.mac/vendor/ReactiveObjC" },
{ "patch_dir": "src/electron/patches/webrtc", "repo": "src/third_party/webrtc" },
{ "patch_dir": "src/electron/patches/reclient-configs", "repo": "src/third_party/engflow-reclient-configs" },
{ "patch_dir": "src/electron/patches/sqlite", "repo": "src/third_party/sqlite/src" }
{ "patch_dir": "src/electron/patches/sqlite", "repo": "src/third_party/sqlite/src" },
{ "patch_dir": "src/electron/patches/skia", "repo": "src/third_party/skia" }
]

1
patches/skia/.patches Normal file
View File

@@ -0,0 +1 @@
graphite_add_insertstatus_koutoforderrecording.patch

View File

@@ -0,0 +1,54 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Michael Ludwig <michaelludwig@google.com>
Date: Fri, 16 Jan 2026 15:28:27 -0500
Subject: [graphite] Add InsertStatus::kOutOfOrderRecording
Bug: b/458722690
Change-Id: I1c3661b9c765f46f039f0044ae3557a0e7f619cf
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1143598
Auto-Submit: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Thomas Smith <thomsmit@google.com>
Commit-Queue: Nicolette Prevost <nicolettep@google.com>
Reviewed-by: Nicolette Prevost <nicolettep@google.com>
diff --git a/include/gpu/graphite/GraphiteTypes.h b/include/gpu/graphite/GraphiteTypes.h
index 448f0767c6f0ebee58a13a6c508a7bfbf9632ba3..6f56424fe38141e632f7cf1bb2e6a8298cd0db7f 100644
--- a/include/gpu/graphite/GraphiteTypes.h
+++ b/include/gpu/graphite/GraphiteTypes.h
@@ -52,7 +52,12 @@ public:
kAddCommandsFailed,
// Internal failure, shader pipeline compilation failed (driver issue, or disk corruption),
// state unrecoverable.
- kAsyncShaderCompilesFailed
+ kAsyncShaderCompilesFailed,
+ // The inserted Recording is out of order from what the Context expects (when
+ // `[Context|Recorder]Options::fRequireOrderedRecordings` is true), which can either
+ // represent a client synchronization error or an internal failure when a prior dependent
+ // Recording failed for some reason, no CB changes but state likely unrecoverable.
+ kOutOfOrderRecording,
};
constexpr InsertStatus() : fValue(kSuccess) {}
diff --git a/relnotes/out_of_order_status.md b/relnotes/out_of_order_status.md
new file mode 100644
index 0000000000000000000000000000000000000000..74f7c889338925d132a2d80cdba40d321807c614
--- /dev/null
+++ b/relnotes/out_of_order_status.md
@@ -0,0 +1,4 @@
+* Graphite's InsertStatus now has an additional kOutOfOrderRecording to differentiate this
+ unrecoverable error from programming errors that would lead to kInvalidRecording. Out of order
+ recordings can currently arise "naturally" if prior dependent recordings failed due to resource
+ creation or update errors from the GPU driver.
diff --git a/src/gpu/graphite/QueueManager.cpp b/src/gpu/graphite/QueueManager.cpp
index 2948e071390095f3a1a1d7fdb6c7878adeaf9c53..f5f97f23e4957b819f9d92e8f03f49cd6fad1ab3 100644
--- a/src/gpu/graphite/QueueManager.cpp
+++ b/src/gpu/graphite/QueueManager.cpp
@@ -122,7 +122,7 @@ InsertStatus QueueManager::addRecording(const InsertRecordingInfo& info, Context
if (recorderID != SK_InvalidGenID) {
uint32_t* recordingID = fLastAddedRecordingIDs.find(recorderID);
RETURN_FAIL_IF(recordingID && info.fRecording->priv().uniqueID() != *recordingID + 1,
- InsertStatus::kInvalidRecording,
+ InsertStatus::kOutOfOrderRecording,
"Recordings are expected to be replayed in order");
// Note the new Recording ID.