diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index 1669252e54..d6addd9766 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -303,7 +303,7 @@ pub struct PipelineLayout<B: hal::Backend> {
     pub(crate) raw: B::PipelineLayout,
     pub(crate) device_id: Stored<DeviceId>,
     pub(crate) life_guard: LifeGuard,
-    pub(crate) bind_group_layout_ids: ArrayVec<[Stored<BindGroupLayoutId>; MAX_BIND_GROUPS]>,
+    pub(crate) bind_group_layout_ids: ArrayVec<[BindGroupLayoutId; MAX_BIND_GROUPS]>,
     pub(crate) push_constant_ranges: ArrayVec<[wgt::PushConstantRange; SHADER_STAGE_COUNT]>,
 }
 
diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs
index 32e0442d93..eb2aee548c 100644
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@@ -505,7 +505,7 @@ impl State {
         &mut self,
         index_format: wgt::IndexFormat,
         vertex_strides: &[(wgt::BufferAddress, wgt::InputStepMode)],
-        layout_ids: &[Stored<id::BindGroupLayoutId>],
+        layout_ids: &[id::BindGroupLayoutId],
         push_constant_layouts: &[wgt::PushConstantRange],
     ) {
         self.index.set_format(index_format);
@@ -528,8 +528,8 @@ impl State {
             self.bind
                 .iter()
                 .zip(layout_ids)
-                .position(|(bs, layout_id)| match bs.bind_group {
-                    Some((_, bgl_id)) => bgl_id != layout_id.value,
+                .position(|(bs, &layout_id)| match bs.bind_group {
+                    Some((_, bgl_id)) => bgl_id != layout_id,
                     None => false,
                 })
         };
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index b1def98856..9ae644dbf0 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -257,14 +257,14 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             .reset_expectations(pipeline_layout.bind_group_layout_ids.len());
                         let mut is_compatible = true;
 
-                        for (index, (entry, bgl_id)) in state
+                        for (index, (entry, &bgl_id)) in state
                             .binder
                             .entries
                             .iter_mut()
                             .zip(&pipeline_layout.bind_group_layout_ids)
                             .enumerate()
                         {
-                            match entry.expect_layout(bgl_id.value) {
+                            match entry.expect_layout(bgl_id) {
                                 LayoutChange::Match(bg_id, offsets) if is_compatible => {
                                     let desc_set = bind_group_guard[bg_id].raw.raw();
                                     unsafe {
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index 0f33e04a96..f6a23fb18a 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1111,14 +1111,14 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             .reset_expectations(pipeline_layout.bind_group_layout_ids.len());
                         let mut is_compatible = true;
 
-                        for (index, (entry, bgl_id)) in state
+                        for (index, (entry, &bgl_id)) in state
                             .binder
                             .entries
                             .iter_mut()
                             .zip(&pipeline_layout.bind_group_layout_ids)
                             .enumerate()
                         {
-                            match entry.expect_layout(bgl_id.value) {
+                            match entry.expect_layout(bgl_id) {
                                 LayoutChange::Match(bg_id, offsets) if is_compatible => {
                                     let desc_set = bind_group_guard[bg_id].raw.raw();
                                     unsafe {
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 99dcb182a0..fecaaa4369 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -32,7 +32,7 @@ pub struct SuspectedResources {
     pub(crate) bind_groups: Vec<id::BindGroupId>,
     pub(crate) compute_pipelines: Vec<id::ComputePipelineId>,
     pub(crate) render_pipelines: Vec<id::RenderPipelineId>,
-    pub(crate) bind_group_layouts: Vec<Stored<id::BindGroupLayoutId>>,
+    pub(crate) bind_group_layouts: Vec<id::BindGroupLayoutId>,
     pub(crate) pipeline_layouts: Vec<Stored<id::PipelineLayoutId>>,
     pub(crate) render_bundles: Vec<id::RenderBundleId>,
 }
@@ -524,27 +524,9 @@ impl<B: GfxBackend> LifetimeTracker<B> {
             }
         }
 
-        if !self.suspected_resources.bind_group_layouts.is_empty() {
-            let (mut guard, _) = hub.bind_group_layouts.write(token);
-
-            for Stored {
-                value: id,
-                ref_count,
-            } in self.suspected_resources.bind_group_layouts.drain(..)
-            {
-                //Note: this has to happen after all the suspected pipelines are destroyed
-                if ref_count.load() == 1 {
-                    #[cfg(feature = "trace")]
-                    trace.map(|t| t.lock().add(trace::Action::DestroyBindGroupLayout(id)));
-                    hub.bind_group_layouts.free_id(id);
-                    let layout = guard.remove(id).unwrap();
-                    self.free_resources.descriptor_set_layouts.push(layout.raw);
-                }
-            }
-        }
-
         if !self.suspected_resources.pipeline_layouts.is_empty() {
-            let (mut guard, _) = hub.pipeline_layouts.write(token);
+            let (mut guard, mut token) = hub.pipeline_layouts.write(token);
+            let (bgl_guard, _) = hub.bind_group_layouts.read(&mut token);
 
             for Stored {
                 value: id,
@@ -557,10 +539,31 @@ impl<B: GfxBackend> LifetimeTracker<B> {
                     trace.map(|t| t.lock().add(trace::Action::DestroyPipelineLayout(id)));
                     hub.pipeline_layouts.free_id(id);
                     let layout = guard.remove(id).unwrap();
+                    for &bgl_id in layout.bind_group_layout_ids.iter() {
+                        bgl_guard[bgl_id].multi_ref_count.dec();
+                    }
                     self.free_resources.pipeline_layouts.push(layout.raw);
                 }
             }
         }
+
+        if !self.suspected_resources.bind_group_layouts.is_empty() {
+            self.suspected_resources.bind_group_layouts.sort();
+            self.suspected_resources.bind_group_layouts.dedup();
+            let (mut guard, _) = hub.bind_group_layouts.write(token);
+
+            for id in self.suspected_resources.bind_group_layouts.drain(..) {
+                //Note: this has to happen after all the suspected pipelines are destroyed
+                //Note: nothing else can bump the refcount since the guard is locked exclusively
+                if guard[id].multi_ref_count.is_empty() {
+                    #[cfg(feature = "trace")]
+                    trace.map(|t| t.lock().add(trace::Action::DestroyBindGroupLayout(id)));
+                    hub.bind_group_layouts.free_id(id);
+                    let layout = guard.remove(id).unwrap();
+                    self.free_resources.descriptor_set_layouts.push(layout.raw);
+                }
+            }
+        }
     }
 
     pub(crate) fn triage_mapped<G: GlobalIdentityHandlerFactory>(
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index 7292bff879..8c9ca6badf 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -1193,11 +1193,12 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         let device = &device_guard[device_id];
 
         // If there is an equivalent BGL, just bump the refcount and return it.
+        // Warning: this isn't valid logic when `id_in` is provided.
         {
             let (bgl_guard, _) = hub.bind_group_layouts.read(&mut token);
             let bind_group_layout_id = bgl_guard
                 .iter(device_id.backend())
-                .find(|(_, bgl)| bgl.entries == entry_map);
+                .find(|(_, bgl)| bgl.device_id.value == device_id && bgl.entries == entry_map);
 
             if let Some((id, value)) = bind_group_layout_id {
                 value.multi_ref_count.inc();
@@ -1308,13 +1309,11 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
 
         let hub = B::hub(self);
         let mut token = Token::root();
-        let (device_id, ref_count) = {
+        let device_id = {
             let (bind_group_layout_guard, _) = hub.bind_group_layouts.read(&mut token);
             let layout = &bind_group_layout_guard[bind_group_layout_id];
-            match layout.multi_ref_count.dec() {
-                Some(last) => (layout.device_id.value, last),
-                None => return,
-            }
+            layout.multi_ref_count.dec();
+            layout.device_id.value
         };
 
         let (device_guard, mut token) = hub.devices.read(&mut token);
@@ -1322,10 +1321,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             .lock_life(&mut token)
             .suspected_resources
             .bind_group_layouts
-            .push(Stored {
-                value: bind_group_layout_id,
-                ref_count,
-            });
+            .push(bind_group_layout_id);
     }
 
     pub fn device_create_pipeline_layout<B: GfxBackend>(
@@ -1428,9 +1424,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                 let (bind_group_layout_guard, _) = hub.bind_group_layouts.read(&mut token);
                 desc.bind_group_layouts
                     .iter()
-                    .map(|&id| Stored {
-                        value: id,
-                        ref_count: bind_group_layout_guard[id].multi_ref_count.add_ref(),
+                    .map(|&id| {
+                        bind_group_layout_guard[id].multi_ref_count.inc();
+                        id
                     })
                     .collect()
             },
@@ -2217,7 +2213,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             let group_layouts = layout
                 .bind_group_layout_ids
                 .iter()
-                .map(|id| &bgl_guard[id.value].entries)
+                .map(|&id| &bgl_guard[id].entries)
                 .collect::<ArrayVec<[&binding_model::BindEntryMap; MAX_BIND_GROUPS]>>();
 
             let (shader_module_guard, _) = hub.shader_modules.read(&mut token);
@@ -2481,7 +2477,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             let group_layouts = layout
                 .bind_group_layout_ids
                 .iter()
-                .map(|id| &bgl_guard[id.value].entries)
+                .map(|&id| &bgl_guard[id].entries)
                 .collect::<ArrayVec<[&binding_model::BindEntryMap; MAX_BIND_GROUPS]>>();
 
             let interface = validation::StageInterface::default();
diff --git a/wgpu-core/src/id.rs b/wgpu-core/src/id.rs
index b134508921..91b5a9cd1d 100644
--- a/wgpu-core/src/id.rs
+++ b/wgpu-core/src/id.rs
@@ -3,7 +3,7 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use crate::{Epoch, Index};
-use std::{fmt, marker::PhantomData, num::NonZeroU64};
+use std::{cmp::Ordering, fmt, marker::PhantomData, num::NonZeroU64};
 use wgt::Backend;
 
 const BACKEND_BITS: usize = 3;
@@ -98,6 +98,18 @@ impl<T> PartialEq for Id<T> {
 
 impl<T> Eq for Id<T> {}
 
+impl<T> PartialOrd for Id<T> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        self.0.partial_cmp(&other.0)
+    }
+}
+
+impl<T> Ord for Id<T> {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.0.cmp(&other.0)
+    }
+}
+
 pub trait TypedId {
     fn zip(index: Index, epoch: Epoch, backend: Backend) -> Self;
     fn unzip(self) -> (Index, Epoch, Backend);
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index 1c18180674..6ec70f95c1 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -100,7 +100,7 @@ impl RefCount {
 
 impl Clone for RefCount {
     fn clone(&self) -> Self {
-        let old_size = unsafe { self.0.as_ref() }.fetch_add(1, Ordering::Release);
+        let old_size = unsafe { self.0.as_ref() }.fetch_add(1, Ordering::AcqRel);
         assert!(old_size < Self::MAX);
         RefCount(self.0)
     }
@@ -137,6 +137,7 @@ fn loom() {
 }
 
 /// Reference count object that tracks multiple references.
+/// Unlike `RefCount`, it's manually inc()/dec() called.
 #[derive(Debug)]
 struct MultiRefCount(ptr::NonNull<AtomicUsize>);
 
@@ -146,33 +147,26 @@ unsafe impl Sync for MultiRefCount {}
 impl MultiRefCount {
     fn new() -> Self {
         let bx = Box::new(AtomicUsize::new(1));
-        MultiRefCount(unsafe { ptr::NonNull::new_unchecked(Box::into_raw(bx)) })
+        let ptr = Box::into_raw(bx);
+        MultiRefCount(unsafe { ptr::NonNull::new_unchecked(ptr) })
     }
 
     fn inc(&self) {
-        unsafe { self.0.as_ref() }.fetch_add(1, Ordering::Release);
+        unsafe { self.0.as_ref() }.fetch_add(1, Ordering::AcqRel);
     }
 
-    fn add_ref(&self) -> RefCount {
-        self.inc();
-        RefCount(self.0)
+    fn dec(&self) {
+        unsafe { self.0.as_ref() }.fetch_sub(1, Ordering::AcqRel);
     }
 
-    fn dec(&self) -> Option<RefCount> {
-        match unsafe { self.0.as_ref() }.fetch_sub(1, Ordering::AcqRel) {
-            0 => unreachable!(),
-            1 => Some(self.add_ref()),
-            _ => None,
-        }
+    fn is_empty(&self) -> bool {
+        unsafe { self.0.as_ref() }.load(Ordering::Acquire) == 0
     }
 }
 
 impl Drop for MultiRefCount {
     fn drop(&mut self) {
-        // We don't do anything here. We rely on the fact that
-        // `dec` was called before `MultiRefCount` got dropped,
-        // which spawned `RefCount`, which upon deletion would
-        // destroy the Box.
+        let _ = unsafe { Box::from_raw(self.0.as_ptr()) };
     }
 }