Merge pull request #2558 from jimblandy/doc-resource-tracking

Document some aspects of resource tracking.
2026-04-22 03:02:01 -04:00 · 2022-03-27 14:11:30 -07:00
parent b52d68d5ad 0d39818b91
commit 4896dee101
4 changed files with 166 additions and 17 deletions
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -81,7 +81,7 @@ impl SuspectedResources {
    }
 }

-/// A struct that keeps lists of resources that are no longer needed.
+/// Raw backend resources that should be freed shortly.
 #[derive(Debug)]
 struct NonReferencedResources<A: hal::Api> {
    buffers: Vec<A::Buffer>,
@@ -189,10 +189,29 @@ impl<A: hal::Api> NonReferencedResources<A> {
    }
 }

+/// Resources used by a queue submission, and work to be done once it completes.
 struct ActiveSubmission<A: hal::Api> {
+    /// The index of the submission we track.
+    ///
+    /// When `Device::fence`'s value is greater than or equal to this, our queue
+    /// submission has completed.
    index: SubmissionIndex,
+
+    /// Resources to be freed once this queue submission has completed.
+    ///
+    /// When the device is polled, for completed submissions,
+    /// `triage_submissions` merges these into
+    /// `LifetimeTracker::free_resources`. From there,
+    /// `LifetimeTracker::cleanup` passes them to the hal to be freed.
+    ///
+    /// This includes things like temporary resources and resources that are
+    /// used by submitted commands but have been dropped by the user (meaning that
+    /// this submission is their last reference.)
    last_resources: NonReferencedResources<A>,
+
+    /// Buffers to be mapped once this submission has completed.
    mapped: Vec<id::Valid<id::BufferId>>,
+
    encoders: Vec<EncoderInFlight<A>>,
    work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
 }
@@ -205,31 +224,75 @@ pub enum WaitIdleError {
    StuckGpu,
 }

-/// A struct responsible for tracking resource lifetimes.
+/// Resource tracking for a device.
 ///
-/// Here is how host mapping is handled:
-///   1. When mapping is requested we add the buffer to the life_tracker list of `mapped` buffers.
-///   2. When `triage_suspected` is called, it checks the last submission index associated with each of the mapped buffer,
-/// and register the buffer with either a submission in flight, or straight into `ready_to_map` vector.
-///   3. When `ActiveSubmission` is retired, the mapped buffers associated with it are moved to `ready_to_map` vector.
-///   4. Finally, `handle_mapping` issues all the callbacks.
+/// ## Host mapping buffers
+///
+/// A buffer cannot be mapped until all active queue submissions that use it
+/// have completed. To that end:
+///
+/// -   Each buffer's `LifeGuard::submission_index` records the index of the
+///     most recent queue submission that uses that buffer.
+///
+/// -   Calling `map_async` adds the buffer to `self.mapped`, and changes
+///     `Buffer::map_state` to prevent it from being used in any new
+///     submissions.
+///
+/// -   When the device is polled, the following `LifetimeTracker` methods decide
+///     what should happen next:
+///
+///     1)  `triage_mapped` drains `self.mapped`, checking the submission index
+///         of each buffer against the queue submissions that have finished
+///         execution. Buffers used by submissions still in flight go in
+///         `self.active[index].mapped`, and the rest go into
+///         `self.ready_to_map`.
+///
+///     2)  `triage_submissions` moves entries in `self.active[i]` for completed
+///         submissions to `self.ready_to_map`.  At this point, both
+///         `self.active` and `self.ready_to_map` are up to date with the given
+///         submission index.
+///
+///     3)  `handle_mapping` drains `self.ready_to_map` and actually maps the
+///         buffers, collecting a list of notification closures to call. But any
+///         buffers that were dropped by the user get moved to
+///         `self.free_resources`.
+///
+///     4)  `cleanup` frees everything in `free_resources`.
+///
+/// Only `self.mapped` holds a `RefCount` for the buffer; it is dropped by
+/// `triage_mapped`.
 pub(super) struct LifetimeTracker<A: hal::Api> {
-    /// Resources that the user has requested be mapped, but are still in use.
+    /// Resources that the user has requested be mapped, but which are used by
+    /// queue submissions still in flight.
    mapped: Vec<Stored<id::BufferId>>,
+
    /// Buffers can be used in a submission that is yet to be made, by the
    /// means of `write_buffer()`, so we have a special place for them.
    pub future_suspected_buffers: Vec<Stored<id::BufferId>>,
+
    /// Textures can be used in the upcoming submission by `write_texture`.
    pub future_suspected_textures: Vec<Stored<id::TextureId>>,
+
    /// Resources that are suspected for destruction.
    pub suspected_resources: SuspectedResources,
-    /// Resources that are not referenced any more but still used by GPU.
-    /// Grouped by submissions associated with a fence and a submission index.
-    /// The active submissions have to be stored in FIFO order: oldest come first.
+
+    /// Resources used by queue submissions still in flight. One entry per
+    /// submission, with older submissions appearing before younger.
+    ///
+    /// Entries are added by `track_submission` and drained by
+    /// `LifetimeTracker::triage_submissions`. Lots of methods contribute data
+    /// to particular entries.
    active: Vec<ActiveSubmission<A>>,
-    /// Resources that are neither referenced or used, just life_tracker
-    /// actual deletion.
+
+    /// Raw backend resources that are neither referenced nor used.
+    ///
+    /// These are freed by `LifeTracker::cleanup`, which is called from periodic
+    /// maintenance functions like `Global::device_poll`, and when a device is
+    /// destroyed.
    free_resources: NonReferencedResources<A>,
+
+    /// Buffers the user has asked us to map, and which are not used by any
+    /// queue submission still in flight.
    ready_to_map: Vec<id::Valid<id::BufferId>>,
 }

@@ -246,6 +309,7 @@ impl<A: hal::Api> LifetimeTracker<A> {
        }
    }

+    /// Start tracking resources associated with a new queue submission.
    pub fn track_submission(
        &mut self,
        index: SubmissionIndex,
@@ -289,7 +353,13 @@ impl<A: hal::Api> LifetimeTracker<A> {
        self.mapped.push(Stored { value, ref_count });
    }

-    /// Returns the last submission index that is done.
+    /// Sort out the consequences of completed submissions.
+    ///
+    /// Assume that all submissions up through `last_done` have completed.
+    /// Buffers they used are now ready to map. Resources for which they were
+    /// the final use are now ready to free.
+    ///
+    /// Return a list of `SubmittedWorkDoneClosure`s to run.
    #[must_use]
    pub fn triage_submissions(
        &mut self,
@@ -647,6 +717,10 @@ impl<A: HalApi> LifetimeTracker<A> {
        }
    }

+    /// Determine which buffers are ready to map, and which must wait for the
+    /// GPU.
+    ///
+    /// See the documentation for [`LifetimeTracker`] for details.
    pub(super) fn triage_mapped<G: GlobalIdentityHandlerFactory>(
        &mut self,
        hub: &Hub<A, G>,
@@ -677,6 +751,11 @@ impl<A: HalApi> LifetimeTracker<A> {
        }
    }

+    /// Map the buffers in `self.ready_to_map`.
+    ///
+    /// Return a list of mapping notifications to send.
+    ///
+    /// See the documentation for [`LifetimeTracker`] for details.
    #[must_use]
    pub(super) fn handle_mapping<G: GlobalIdentityHandlerFactory>(
        &mut self,
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -273,6 +273,8 @@ pub struct Device<A: hal::Api> {
    pub(crate) trackers: Mutex<TrackerSet>,
    // Life tracker should be locked right after the device and before anything else.
    life_tracker: Mutex<life::LifetimeTracker<A>>,
+    /// Temporary storage for resource management functions. Cleared at the end
+    /// of every call (unless an error occurs).
    temp_suspected: life::SuspectedResources,
    pub(crate) alignments: hal::Alignments,
    pub(crate) limits: wgt::Limits,
@@ -420,6 +422,11 @@ impl<A: HalApi> Device<A> {
        profiling::scope!("maintain", "Device");
        let mut life_tracker = self.lock_life(token);

+        // Normally, `temp_suspected` exists only to save heap
+        // allocations: it's cleared at the start of the function
+        // call, and cleared by the end. But `Global::queue_submit` is
+        // fallible; if it exits early, it may leave some resources in
+        // `temp_suspected`.
        life_tracker
            .suspected_resources
            .extend(&self.temp_suspected);
@@ -4861,6 +4868,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
        Ok(())
    }

+    /// Check `device_id` for freeable resources and completed buffer mappings.
    pub fn device_poll<A: HalApi>(
        &self,
        device_id: id::DeviceId,
@@ -4881,6 +4889,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
        Ok(())
    }

+    /// Poll all devices belonging to the backend `A`.
+    ///
+    /// If `force_wait` is true, block until all buffer mappings are done.
    fn poll_devices<A: HalApi>(
        &self,
        force_wait: bool,
@@ -4898,6 +4909,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
        Ok(())
    }

+    /// Poll all devices on all backends.
+    ///
+    /// This is the implementation of `wgpu::Instance::poll_all`.
    pub fn poll_all_devices(&self, force_wait: bool) -> Result<(), WaitIdleError> {
        let mut closures = UserClosures::default();

--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -50,7 +50,11 @@ use atomic::{AtomicUsize, Ordering};

 use std::{borrow::Cow, os::raw::c_char, ptr, sync::atomic};

+/// The index of a queue submission.
+///
+/// These are the values stored in `Device::fence`.
 type SubmissionIndex = hal::FenceValue;
+
 type Index = u32;
 type Epoch = u32;

@@ -71,6 +75,15 @@ impl<'a> LabelHelpers<'a> for Label<'a> {
 }

 /// Reference count object that is 1:1 with each reference.
+///
+/// All the clones of a given `RefCount` point to the same
+/// heap-allocated atomic reference count. When the count drops to
+/// zero, only the count is freed. No other automatic cleanup takes
+/// place; this is just a reference count, not a smart pointer.
+///
+/// `RefCount` values are created only by [`LifeGuard::new`] and by
+/// `Clone`, so every `RefCount` is implicitly tied to some
+/// [`LifeGuard`].
 #[derive(Debug)]
 struct RefCount(ptr::NonNull<AtomicUsize>);

@@ -122,10 +135,48 @@ impl MultiRefCount {
    }
 }

+/// Information needed to decide when it's safe to free some wgpu-core
+/// resource.
+///
+/// Each type representing a `wgpu-core` resource, like [`Device`],
+/// [`Buffer`], etc., contains a `LifeGuard` which indicates whether
+/// it is safe to free.
+///
+/// A resource may need to be retained for any of several reasons:
+///
+/// - The user may hold a reference to it (via a `wgpu::Buffer`, say).
+///
+/// - Other resources may depend on it (a texture view's backing
+///   texture, for example).
+///
+/// - It may be used by commands sent to the GPU that have not yet
+///   finished execution.
+///
+/// [`Device`]: device::Device
+/// [`Buffer`]: resource::Buffer
 #[derive(Debug)]
 pub struct LifeGuard {
+    /// `RefCount` for the user's reference to this resource.
+    ///
+    /// When the user first creates a `wgpu-core` resource, this `RefCount` is
+    /// created along with the resource's `LifeGuard`. When the user drops the
+    /// resource, we swap this out for `None`. Note that the resource may
+    /// still be held alive by other resources.
+    ///
+    /// Any `Stored<T>` value holds a clone of this `RefCount` along with the id
+    /// of a `T` resource.
    ref_count: Option<RefCount>,
+
+    /// The index of the last queue submission in which the resource
+    /// was used.
+    ///
+    /// Each queue submission is fenced and assigned an index number
+    /// sequentially. Thus, when a queue submission completes, we know any
+    /// resources used in that submission and any lower-numbered submissions are
+    /// no longer in use by the GPU.
    submission_index: AtomicUsize,
+
+    /// The `label` from the descriptor used to create the resource.
    #[cfg(debug_assertions)]
    pub(crate) label: String,
 }
@@ -146,7 +197,10 @@ impl LifeGuard {
        self.ref_count.clone().unwrap()
    }

-    /// Returns `true` if the resource is still needed by the user.
+    /// Record that this resource will be used by the queue submission with the
+    /// given index.
+    ///
+    /// Returns `true` if the resource is still held by the user.
    fn use_at(&self, submit_index: SubmissionIndex) -> bool {
        self.submission_index
            .store(submit_index as _, Ordering::Release);
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -230,7 +230,9 @@ impl<S: ResourceState> ResourceTracker<S> {
        }
    }

-    /// Removes the resource from the tracker if we are holding the last reference.
+    /// Remove the resource from the tracker if it is holding the last reference.
+    ///
+    /// Return `true` if we did remove the resource.
    pub(crate) fn remove_abandoned(&mut self, id: Valid<S::Id>) -> bool {
        let (index, epoch, backend) = id.0.unzip();
        debug_assert_eq!(backend, self.backend);