feat: snapshots (#5738)

Co-authored-by: Matthias Seitz <matthias.seitz@outlook.de> Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com> Co-authored-by: Bjerg <onbjerg@users.noreply.github.com>
2026-04-30 03:01:58 -04:00 · 2023-12-18 15:55:43 +00:00
parent 900fe7ea4e
commit 18dd1b72a1
28 changed files with 1749 additions and 453 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6292,6 +6292,7 @@ dependencies = [
 "reth-primitives",
 "reth-trie",
 "revm",
+ "strum",
 "tempfile",
 "tokio",
 "tokio-stream",
--- a/bin/reth/src/db/snapshots/headers.rs
+++ b/bin/reth/src/db/snapshots/headers.rs
@@ -50,7 +50,7 @@ impl Command {
        let path: PathBuf = SnapshotSegment::Headers
            .filename_with_configuration(filters, compression, &block_range, &tx_range)
            .into();
-        let provider = SnapshotProvider::default();
+        let provider = SnapshotProvider::new(PathBuf::default())?;
        let jar_provider = provider.get_segment_provider_from_block(
            SnapshotSegment::Headers,
            self.from,
--- a/bin/reth/src/db/snapshots/mod.rs
+++ b/bin/reth/src/db/snapshots/mod.rs
@@ -4,7 +4,7 @@ use itertools::Itertools;
 use rayon::iter::{IntoParallelIterator, ParallelIterator};
 use reth_db::{database::Database, open_db_read_only, DatabaseEnv};
 use reth_interfaces::db::LogLevel;
-use reth_nippy_jar::NippyJar;
+use reth_nippy_jar::{NippyJar, NippyJarCursor};
 use reth_primitives::{
    snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction, SegmentHeader},
    BlockNumber, ChainSpec, SnapshotSegment,
@@ -210,19 +210,18 @@ impl Command {
    fn stats(&self, snapshots: Vec<impl AsRef<Path>>) -> eyre::Result<()> {
        let mut total_filters_size = 0;
        let mut total_index_size = 0;
-        let mut total_offsets_size = 0;
        let mut total_duration = Duration::new(0, 0);
        let mut total_file_size = 0;

        for snap in &snapshots {
            let start_time = Instant::now();
            let jar = NippyJar::<SegmentHeader>::load(snap.as_ref())?;
+            let _cursor = NippyJarCursor::new(&jar)?;
            let duration = start_time.elapsed();
            let file_size = snap.as_ref().metadata()?.len();

            total_filters_size += jar.filter_size();
            total_index_size += jar.offsets_index_size();
-            total_offsets_size += jar.offsets_size();
            total_duration += duration;
            total_file_size += file_size;

@@ -230,7 +229,6 @@ impl Command {
            println!("  File Size:           {:>7}", human_bytes(file_size as f64));
            println!("  Filters Size:        {:>7}", human_bytes(jar.filter_size() as f64));
            println!("  Offset Index Size:   {:>7}", human_bytes(jar.offsets_index_size() as f64));
-            println!("  Offset List Size:    {:>7}", human_bytes(jar.offsets_size() as f64));
            println!(
                "  Loading Time:        {:>7.2} ms | {:>7.2} µs",
                duration.as_millis() as f64,
@@ -242,7 +240,6 @@ impl Command {

        println!("Total Filters Size:     {:>7}", human_bytes(total_filters_size as f64));
        println!("Total Offset Index Size: {:>7}", human_bytes(total_index_size as f64));
-        println!("Total Offset List Size:  {:>7}", human_bytes(total_offsets_size as f64));
        println!("Total File Size:         {:>7}", human_bytes(total_file_size as f64));
        println!(
            "Average Loading Time:    {:>7.2} ms | {:>7.2} µs",
--- a/bin/reth/src/db/snapshots/receipts.rs
+++ b/bin/reth/src/db/snapshots/receipts.rs
@@ -53,7 +53,7 @@ impl Command {
            .filename_with_configuration(filters, compression, &block_range, &tx_range)
            .into();

-        let provider = SnapshotProvider::default();
+        let provider = SnapshotProvider::new(PathBuf::default())?;
        let jar_provider = provider.get_segment_provider_from_block(
            SnapshotSegment::Receipts,
            self.from,
--- a/bin/reth/src/db/snapshots/transactions.rs
+++ b/bin/reth/src/db/snapshots/transactions.rs
@@ -50,7 +50,7 @@ impl Command {
        let path: PathBuf = SnapshotSegment::Transactions
            .filename_with_configuration(filters, compression, &block_range, &tx_range)
            .into();
-        let provider = SnapshotProvider::default();
+        let provider = SnapshotProvider::new(PathBuf::default())?;
        let jar_provider = provider.get_segment_provider_from_block(
            SnapshotSegment::Transactions,
            self.from,
--- a/bin/reth/src/node/mod.rs
+++ b/bin/reth/src/node/mod.rs
@@ -266,7 +266,7 @@ impl<Ext: RethCliExt> NodeCommand<Ext> {
        )?;

        provider_factory = provider_factory
-            .with_snapshots(data_dir.snapshots_path(), snapshotter.highest_snapshot_receiver());
+            .with_snapshots(data_dir.snapshots_path(), snapshotter.highest_snapshot_receiver())?;

        self.start_metrics_endpoint(prometheus_handle, Arc::clone(&db)).await?;

--- a/crates/interfaces/src/provider.rs
+++ b/crates/interfaces/src/provider.rs
@@ -14,6 +14,9 @@ pub enum ProviderError {
    /// Database error.
    #[error(transparent)]
    Database(#[from] crate::db::DatabaseError),
+    /// Filesystem path error.
+    #[error("{0}")]
+    FsPathError(String),
    /// Nippy jar error.
    #[error("nippy jar error: {0}")]
    NippyJar(String),
@@ -127,6 +130,12 @@ impl From<reth_nippy_jar::NippyJarError> for ProviderError {
    }
 }

+impl From<reth_primitives::fs::FsPathError> for ProviderError {
+    fn from(err: reth_primitives::fs::FsPathError) -> Self {
+        ProviderError::FsPathError(err.to_string())
+    }
+}
+
 /// A root mismatch error at a given block height.
 #[derive(Clone, Debug, Error, PartialEq, Eq)]
 #[error("root mismatch at #{block_number} ({block_hash}): {root}")]
--- a/crates/primitives/src/snapshot/mod.rs
+++ b/crates/primitives/src/snapshot/mod.rs
@@ -4,14 +4,11 @@ mod compression;
 mod filters;
 mod segment;

-use alloy_primitives::{BlockNumber, TxNumber};
+use alloy_primitives::BlockNumber;
 pub use compression::Compression;
 pub use filters::{Filters, InclusionFilter, PerfectHashingFunction};
 pub use segment::{SegmentConfig, SegmentHeader, SnapshotSegment};

-use crate::fs::FsPathError;
-use std::{ops::RangeInclusive, path::Path};
-
 /// Default snapshot block count.
 pub const BLOCKS_PER_SNAPSHOT: u64 = 500_000;

@@ -48,20 +45,3 @@ impl HighestSnapshots {
        }
    }
 }
-
-/// Given the snapshot's location, it returns an iterator over the existing snapshots in the format
-/// of a tuple composed by the segment, block range and transaction range.
-pub fn iter_snapshots(
-    path: impl AsRef<Path>,
-) -> Result<
-    impl Iterator<Item = (SnapshotSegment, RangeInclusive<BlockNumber>, RangeInclusive<TxNumber>)>,
-    FsPathError,
-> {
-    let entries = crate::fs::read_dir(path.as_ref())?.filter_map(Result::ok);
-    Ok(entries.filter_map(|entry| {
-        if entry.metadata().map_or(false, |metadata| metadata.is_file()) {
-            return SnapshotSegment::parse_filename(&entry.file_name())
-        }
-        None
-    }))
-}
--- a/crates/primitives/src/snapshot/segment.rs
+++ b/crates/primitives/src/snapshot/segment.rs
@@ -5,7 +5,7 @@ use crate::{
 use derive_more::Display;
 use serde::{Deserialize, Serialize};
 use std::{ffi::OsStr, ops::RangeInclusive, str::FromStr};
-use strum::{AsRefStr, EnumString};
+use strum::{AsRefStr, EnumIter, EnumString};

 #[derive(
    Debug,
@@ -19,6 +19,7 @@ use strum::{AsRefStr, EnumString};
    Deserialize,
    Serialize,
    EnumString,
+    EnumIter,
    AsRefStr,
    Display,
 )]
@@ -153,6 +154,16 @@ impl SegmentHeader {
        Self { block_range, tx_range, segment }
    }

+    /// Returns the transaction range.
+    pub fn tx_range(&self) -> &RangeInclusive<TxNumber> {
+        &self.tx_range
+    }
+
+    /// Returns the block range.
+    pub fn block_range(&self) -> &RangeInclusive<BlockNumber> {
+        &self.block_range
+    }
+
    /// Returns the first block number of the segment.
    pub fn block_start(&self) -> BlockNumber {
        *self.block_range.start()
--- a/crates/snapshot/src/snapshotter.rs
+++ b/crates/snapshot/src/snapshotter.rs
@@ -1,12 +1,9 @@
 //! Support for snapshotting.

 use crate::{segments, segments::Segment, SnapshotterError};
-use reth_db::database::Database;
+use reth_db::{database::Database, snapshot::iter_snapshots};
 use reth_interfaces::{RethError, RethResult};
-use reth_primitives::{
-    snapshot::{iter_snapshots, HighestSnapshots},
-    BlockNumber, TxNumber,
-};
+use reth_primitives::{snapshot::HighestSnapshots, BlockNumber, TxNumber};
 use reth_provider::{BlockReader, DatabaseProviderRO, ProviderFactory, TransactionsProviderExt};
 use std::{
    collections::HashMap,
@@ -155,10 +152,14 @@ impl<DB: Database> Snapshotter<DB> {
        // It walks over the directory and parses the snapshot filenames extracting
        // `SnapshotSegment` and their inclusive range. It then takes the maximum block
        // number for each specific segment.
-        for (segment, block_range, _) in iter_snapshots(&self.snapshots_path)? {
-            let max_segment_block = self.highest_snapshots.as_mut(segment);
-            if max_segment_block.map_or(true, |block| block < *block_range.end()) {
-                *max_segment_block = Some(*block_range.end());
+        for (segment, ranges) in
+            iter_snapshots(&self.snapshots_path).map_err(|err| RethError::Provider(err.into()))?
+        {
+            for (block_range, _) in ranges {
+                let max_segment_block = self.highest_snapshots.as_mut(segment);
+                if max_segment_block.map_or(true, |block| block < *block_range.end()) {
+                    *max_segment_block = Some(*block_range.end());
+                }
            }
        }

--- a/crates/storage/db/src/snapshot/cursor.rs
+++ b/crates/storage/db/src/snapshot/cursor.rs
@@ -2,8 +2,9 @@ use super::mask::{ColumnSelectorOne, ColumnSelectorThree, ColumnSelectorTwo};
 use crate::table::Decompress;
 use derive_more::{Deref, DerefMut};
 use reth_interfaces::provider::ProviderResult;
-use reth_nippy_jar::{MmapHandle, NippyJar, NippyJarCursor};
+use reth_nippy_jar::{DataReader, NippyJar, NippyJarCursor};
 use reth_primitives::{snapshot::SegmentHeader, B256};
+use std::sync::Arc;

 /// Cursor of a snapshot segment.
 #[derive(Debug, Deref, DerefMut)]
@@ -11,8 +12,8 @@ pub struct SnapshotCursor<'a>(NippyJarCursor<'a, SegmentHeader>);

 impl<'a> SnapshotCursor<'a> {
    /// Returns a new [`SnapshotCursor`].
-    pub fn new(jar: &'a NippyJar<SegmentHeader>, mmap_handle: MmapHandle) -> ProviderResult<Self> {
-        Ok(Self(NippyJarCursor::with_handle(jar, mmap_handle)?))
+    pub fn new(jar: &'a NippyJar<SegmentHeader>, reader: Arc<DataReader>) -> ProviderResult<Self> {
+        Ok(Self(NippyJarCursor::with_reader(jar, reader)?))
    }

    /// Returns the current `BlockNumber` or `TxNumber` of the cursor depending on the kind of
--- a/crates/storage/db/src/snapshot/generation.rs
+++ b/crates/storage/db/src/snapshot/generation.rs
@@ -6,9 +6,8 @@ use crate::{
 };

 use reth_interfaces::provider::ProviderResult;
-use reth_nippy_jar::{ColumnResult, NippyJar, PHFKey};
+use reth_nippy_jar::{ColumnResult, NippyJar, NippyJarHeader, PHFKey};
 use reth_tracing::tracing::*;
-use serde::{Deserialize, Serialize};
 use std::{error::Error as StdError, ops::RangeInclusive};

 /// Macro that generates snapshot creation functions that take an arbitratry number of [`Table`] and
@@ -35,7 +34,7 @@ macro_rules! generate_snapshot_func {
                pub fn [<create_snapshot$(_ $tbl)+>]<
                    $($tbl: Table<Key=K>,)+
                    K,
-                    H: for<'a> Deserialize<'a> + Send + Serialize + Sync + std::fmt::Debug
+                    H: NippyJarHeader
                >
                (
                    tx: &impl DbTx,
--- a/crates/storage/db/src/snapshot/mod.rs
+++ b/crates/storage/db/src/snapshot/mod.rs
@@ -1,6 +1,12 @@
 //! reth's snapshot database table import and access

 mod generation;
+use std::{
+    collections::{hash_map::Entry, HashMap},
+    ops::RangeInclusive,
+    path::Path,
+};
+
 pub use generation::*;

 mod cursor;
@@ -8,5 +14,63 @@ pub use cursor::SnapshotCursor;

 mod mask;
 pub use mask::*;
+use reth_nippy_jar::{NippyJar, NippyJarError};
+use reth_primitives::{snapshot::SegmentHeader, BlockNumber, SnapshotSegment, TxNumber};

 mod masks;
+
+/// Alias type for a map of [`SnapshotSegment`] and sorted lists of existing snapshot ranges.
+type SortedSnapshots =
+    HashMap<SnapshotSegment, Vec<(RangeInclusive<BlockNumber>, RangeInclusive<TxNumber>)>>;
+
+/// Given the snapshots directory path, it returns a list over the existing snapshots organized by
+/// [`SnapshotSegment`]. Each segment has a sorted list of block ranges and transaction ranges.
+pub fn iter_snapshots(path: impl AsRef<Path>) -> Result<SortedSnapshots, NippyJarError> {
+    let mut static_files = SortedSnapshots::default();
+    let entries = reth_primitives::fs::read_dir(path.as_ref())
+        .map_err(|err| NippyJarError::Custom(err.to_string()))?
+        .filter_map(Result::ok)
+        .collect::<Vec<_>>();
+
+    for entry in entries {
+        if entry.metadata().map_or(false, |metadata| metadata.is_file()) {
+            if let Some((segment, block_range, tx_range)) =
+                SnapshotSegment::parse_filename(&entry.file_name())
+            {
+                let ranges = (block_range, tx_range);
+                match static_files.entry(segment) {
+                    Entry::Occupied(mut entry) => {
+                        entry.get_mut().push(ranges);
+                    }
+                    Entry::Vacant(entry) => {
+                        entry.insert(vec![ranges]);
+                    }
+                }
+            }
+        }
+    }
+
+    for (segment, range_list) in static_files.iter_mut() {
+        // Sort by block end range.
+        range_list.sort_by(|a, b| a.0.end().cmp(b.0.end()));
+
+        if let Some((block_range, tx_range)) = range_list.pop() {
+            // The highest height static file filename might not be indicative of its actual
+            // block_range, so we need to read its actual configuration.
+            let jar = NippyJar::<SegmentHeader>::load(
+                &path.as_ref().join(segment.filename(&block_range, &tx_range)),
+            )?;
+
+            if &tx_range != jar.user_header().tx_range() {
+                // TODO(joshie): rename
+            }
+
+            range_list.push((
+                jar.user_header().block_range().clone(),
+                jar.user_header().tx_range().clone(),
+            ));
+        }
+    }
+
+    Ok(static_files)
+}
--- a/crates/storage/nippy-jar/src/cursor.rs
+++ b/crates/storage/nippy-jar/src/cursor.rs
@@ -1,9 +1,9 @@
 use crate::{
    compression::{Compression, Compressors, Zstd},
-    InclusionFilter, MmapHandle, NippyJar, NippyJarError, PerfectHashingFunction, RefRow,
+    DataReader, InclusionFilter, NippyJar, NippyJarError, NippyJarHeader, PerfectHashingFunction,
+    RefRow,
 };
-use serde::{de::Deserialize, ser::Serialize};
-use std::ops::Range;
+use std::{ops::Range, sync::Arc};
 use sucds::int_vectors::Access;
 use zstd::bulk::Decompressor;

@@ -12,46 +12,40 @@ use zstd::bulk::Decompressor;
 pub struct NippyJarCursor<'a, H = ()> {
    /// [`NippyJar`] which holds most of the required configuration to read from the file.
    jar: &'a NippyJar<H>,
-    /// Data file.
-    mmap_handle: MmapHandle,
+    /// Data and offset reader.
+    reader: Arc<DataReader>,
    /// Internal buffer to unload data to without reallocating memory on each retrieval.
    internal_buffer: Vec<u8>,
    /// Cursor row position.
    row: u64,
 }

-impl<'a, H: std::fmt::Debug> std::fmt::Debug for NippyJarCursor<'a, H>
-where
-    H: Send + Sync + Serialize + for<'b> Deserialize<'b> + core::fmt::Debug,
-{
+impl<'a, H: NippyJarHeader> std::fmt::Debug for NippyJarCursor<'a, H> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("NippyJarCursor").field("config", &self.jar).finish_non_exhaustive()
    }
 }

-impl<'a, H> NippyJarCursor<'a, H>
-where
-    H: Send + Sync + Serialize + for<'b> Deserialize<'b> + std::fmt::Debug + 'static,
-{
+impl<'a, H: NippyJarHeader> NippyJarCursor<'a, H> {
    pub fn new(jar: &'a NippyJar<H>) -> Result<Self, NippyJarError> {
        let max_row_size = jar.max_row_size;
        Ok(NippyJarCursor {
            jar,
-            mmap_handle: jar.open_data()?,
+            reader: Arc::new(jar.open_data_reader()?),
            // Makes sure that we have enough buffer capacity to decompress any row of data.
            internal_buffer: Vec::with_capacity(max_row_size),
            row: 0,
        })
    }

-    pub fn with_handle(
+    pub fn with_reader(
        jar: &'a NippyJar<H>,
-        mmap_handle: MmapHandle,
+        reader: Arc<DataReader>,
    ) -> Result<Self, NippyJarError> {
        let max_row_size = jar.max_row_size;
        Ok(NippyJarCursor {
            jar,
-            mmap_handle,
+            reader,
            // Makes sure that we have enough buffer capacity to decompress any row of data.
            internal_buffer: Vec::with_capacity(max_row_size),
            row: 0,
@@ -112,7 +106,7 @@ where
    pub fn next_row(&mut self) -> Result<Option<RefRow<'_>>, NippyJarError> {
        self.internal_buffer.clear();

-        if self.row as usize * self.jar.columns >= self.jar.offsets.len() {
+        if self.row as usize >= self.jar.rows {
            // Has reached the end
            return Ok(None)
        }
@@ -129,7 +123,7 @@ where
        Ok(Some(
            row.into_iter()
                .map(|v| match v {
-                    ValueRange::Mmap(range) => &self.mmap_handle[range],
+                    ValueRange::Mmap(range) => self.reader.data(range),
                    ValueRange::Internal(range) => &self.internal_buffer[range],
                })
                .collect(),
@@ -186,7 +180,7 @@ where
    pub fn next_row_with_cols(&mut self, mask: usize) -> Result<Option<RefRow<'_>>, NippyJarError> {
        self.internal_buffer.clear();

-        if self.row as usize * self.jar.columns >= self.jar.offsets.len() {
+        if self.row as usize >= self.jar.rows {
            // Has reached the end
            return Ok(None)
        }
@@ -204,7 +198,7 @@ where
        Ok(Some(
            row.into_iter()
                .map(|v| match v {
-                    ValueRange::Mmap(range) => &self.mmap_handle[range],
+                    ValueRange::Mmap(range) => self.reader.data(range),
                    ValueRange::Internal(range) => &self.internal_buffer[range],
                })
                .collect(),
@@ -219,13 +213,13 @@ where
    ) -> Result<(), NippyJarError> {
        // Find out the offset of the column value
        let offset_pos = self.row as usize * self.jar.columns + column;
-        let value_offset = self.jar.offsets.select(offset_pos).expect("should exist");
+        let value_offset = self.reader.offset(offset_pos) as usize;

-        let column_offset_range = if self.jar.offsets.len() == (offset_pos + 1) {
+        let column_offset_range = if self.jar.rows * self.jar.columns == offset_pos + 1 {
            // It's the last column of the last row
-            value_offset..self.mmap_handle.len()
+            value_offset..self.reader.size()
        } else {
-            let next_value_offset = self.jar.offsets.select(offset_pos + 1).expect("should exist");
+            let next_value_offset = self.reader.offset(offset_pos + 1) as usize;
            value_offset..next_value_offset
        };

@@ -242,7 +236,7 @@ where
                        .expect("dictionary to be loaded");
                    let mut decompressor = Decompressor::with_prepared_dictionary(dictionaries)?;
                    Zstd::decompress_with_dictionary(
-                        &self.mmap_handle[column_offset_range],
+                        self.reader.data(column_offset_range),
                        &mut self.internal_buffer,
                        &mut decompressor,
                    )?;
@@ -250,7 +244,7 @@ where
                _ => {
                    // Uses the chosen default decompressor
                    compression.decompress_to(
-                        &self.mmap_handle[column_offset_range],
+                        self.reader.data(column_offset_range),
                        &mut self.internal_buffer,
                    )?;
                }
--- a/crates/storage/nippy-jar/src/error.rs
+++ b/crates/storage/nippy-jar/src/error.rs
@@ -37,8 +37,12 @@ pub enum NippyJarError {
    UnsupportedFilterQuery,
    #[error("compression or decompression requires a bigger destination output")]
    OutputTooSmall,
-    #[error("Dictionary is not loaded.")]
+    #[error("dictionary is not loaded.")]
    DictionaryNotLoaded,
-    #[error("It's not possible to generate a compressor after loading a dictionary.")]
+    #[error("it's not possible to generate a compressor after loading a dictionary.")]
    CompressorNotAllowed,
+    #[error("number of offsets ({0}) is smaller than prune request ({1}).")]
+    InvalidPruning(u64, u64),
+    #[error("jar has been frozen and cannot be modified.")]
+    FrozenJar,
 }
--- a/crates/storage/nippy-jar/src/lib.rs
+++ b/crates/storage/nippy-jar/src/lib.rs
@@ -13,20 +13,13 @@
 use memmap2::Mmap;
 use serde::{Deserialize, Serialize};
 use std::{
-    clone::Clone,
    error::Error as StdError,
    fs::File,
-    io::{Seek, Write},
    marker::Sync,
-    ops::Deref,
+    ops::Range,
    path::{Path, PathBuf},
-    sync::Arc,
-};
-use sucds::{
-    int_vectors::PrefixSummedEliasFano,
-    mii_sequences::{EliasFano, EliasFanoBuilder},
-    Serializable,
 };
+use sucds::{int_vectors::PrefixSummedEliasFano, Serializable};
 use tracing::*;

 pub mod filter;
@@ -45,15 +38,34 @@ pub use error::NippyJarError;
 mod cursor;
 pub use cursor::NippyJarCursor;

+mod writer;
+pub use writer::NippyJarWriter;
+
 const NIPPY_JAR_VERSION: usize = 1;

+const INDEX_FILE_EXTENSION: &str = "idx";
+const OFFSETS_FILE_EXTENSION: &str = "off";
+const CONFIG_FILE_EXTENSION: &str = "conf";
+
 /// A [`RefRow`] is a list of column value slices pointing to either an internal buffer or a
 /// memory-mapped file.
 type RefRow<'a> = Vec<&'a [u8]>;

-/// Alias type for a column value wrapped in `Result`
+/// Alias type for a column value wrapped in `Result`.
 pub type ColumnResult<T> = Result<T, Box<dyn StdError + Send + Sync>>;

+/// A trait for the user-defined header of [NippyJar].
+pub trait NippyJarHeader:
+    Send + Sync + Serialize + for<'b> Deserialize<'b> + std::fmt::Debug + 'static
+{
+}
+
+// Blanket implementation for all types that implement the required traits.
+impl<T> NippyJarHeader for T where
+    T: Send + Sync + Serialize + for<'b> Deserialize<'b> + std::fmt::Debug + 'static
+{
+}
+
 /// `NippyJar` is a specialized storage format designed for immutable data.
 ///
 /// Data is organized into a columnar format, enabling column-based compression. Data retrieval
@@ -88,40 +100,39 @@ pub struct NippyJar<H = ()> {
    user_header: H,
    /// Number of data columns in the jar.
    columns: usize,
+    /// Number of data rows in the jar.
+    rows: usize,
    /// Optional compression algorithm applied to the data.
    compressor: Option<Compressors>,
+    #[serde(skip)]
    /// Optional filter function for data membership checks.
    filter: Option<InclusionFilters>,
+    #[serde(skip)]
    /// Optional Perfect Hashing Function (PHF) for unique offset mapping.
    phf: Option<Functions>,
    /// Index mapping PHF output to value offsets in `offsets`.
    #[serde(skip)]
    offsets_index: PrefixSummedEliasFano,
-    /// Offsets within the file for each column value, arranged by row and column.
-    #[serde(skip)]
-    offsets: EliasFano,
    /// Maximum uncompressed row size of the set. This will enable decompression without any
    /// resizing of the output buffer.
-    #[serde(skip)]
    max_row_size: usize,
-    /// Data path for file. Index file will be `{path}.idx`
+    /// Data path for file. Supporting files will have a format `{path}.{extension}`.
    #[serde(skip)]
-    path: Option<PathBuf>,
+    path: PathBuf,
 }

-impl<H: std::fmt::Debug> std::fmt::Debug for NippyJar<H> {
+impl<H: NippyJarHeader> std::fmt::Debug for NippyJar<H> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("NippyJar")
            .field("version", &self.version)
            .field("user_header", &self.user_header)
+            .field("rows", &self.rows)
            .field("columns", &self.columns)
            .field("compressor", &self.compressor)
            .field("filter", &self.filter)
            .field("phf", &self.phf)
            .field("offsets_index (len)", &self.offsets_index.len())
            .field("offsets_index (size in bytes)", &self.offsets_index.size_in_bytes())
-            .field("offsets (len)", &self.offsets.len())
-            .field("offsets (size in bytes)", &self.offsets.size_in_bytes())
            .field("path", &self.path)
            .field("max_row_size", &self.max_row_size)
            .finish_non_exhaustive()
@@ -145,23 +156,20 @@ impl NippyJar<()> {
    }
 }

-impl<H> NippyJar<H>
-where
-    H: Send + Sync + Serialize + for<'a> Deserialize<'a> + std::fmt::Debug,
-{
+impl<H: NippyJarHeader> NippyJar<H> {
    /// Creates a new [`NippyJar`] with a user-defined header data.
    pub fn new(columns: usize, path: &Path, user_header: H) -> Self {
        NippyJar {
            version: NIPPY_JAR_VERSION,
            user_header,
            columns,
+            rows: 0,
            max_row_size: 0,
            compressor: None,
            filter: None,
            phf: None,
-            offsets: EliasFano::default(),
            offsets_index: PrefixSummedEliasFano::default(),
-            path: Some(path.to_path_buf()),
+            path: path.to_path_buf(),
        }
    }

@@ -201,17 +209,12 @@ where
        &self.user_header
    }

-    /// Gets a reference to `self.offsets`.
-    pub fn offsets_size(&self) -> usize {
-        self.offsets.size_in_bytes()
-    }
-
-    /// Gets a reference to `self.offsets`.
+    /// Returns the size of inclusion filter
    pub fn filter_size(&self) -> usize {
        self.size()
    }

-    /// Gets a reference to `self.offsets_index`.
+    /// Returns the size of offsets index
    pub fn offsets_index_size(&self) -> usize {
        self.offsets_index.size_in_bytes()
    }
@@ -226,48 +229,53 @@ where
        self.compressor.as_mut()
    }

-    /// Loads the file configuration and returns [`Self`].
+    /// Loads the file configuration and returns [`Self`] without deserializing filters related
+    /// structures or the offset list.
    ///
    /// **The user must ensure the header type matches the one used during the jar's creation.**
    pub fn load(path: &Path) -> Result<Self, NippyJarError> {
        // Read [`Self`] located at the data file.
-        let data_file = File::open(path)?;
-
-        // SAFETY: File is read-only and its descriptor is kept alive as long as the mmap handle.
-        let data_reader = unsafe { memmap2::Mmap::map(&data_file)? };
-        let mut obj: Self = bincode::deserialize_from(data_reader.as_ref())?;
-        obj.path = Some(path.to_path_buf());
-
-        // Read the offsets lists located at the index file.
-        let offsets_file = File::open(obj.index_path())?;
-
-        // SAFETY: File is read-only and its descriptor is kept alive as long as the mmap handle.
-        let mmap = unsafe { memmap2::Mmap::map(&offsets_file)? };
-        let mut offsets_reader = mmap.as_ref();
-        obj.offsets = EliasFano::deserialize_from(&mut offsets_reader)?;
-        obj.offsets_index = PrefixSummedEliasFano::deserialize_from(&mut offsets_reader)?;
-        obj.max_row_size = bincode::deserialize_from(offsets_reader)?;
+        let config_file = File::open(path.with_extension(CONFIG_FILE_EXTENSION))?;

+        let mut obj: Self = bincode::deserialize_from(&config_file)?;
+        obj.path = path.to_path_buf();
        Ok(obj)
    }

-    /// Returns the path from the data file
-    pub fn data_path(&self) -> PathBuf {
-        self.path.clone().expect("exists")
+    /// Loads filters into memory
+    pub fn load_filters(mut self) -> Result<Self, NippyJarError> {
+        // Read the offsets lists located at the index file.
+        let mut offsets_file = File::open(self.index_path())?;
+
+        self.offsets_index = PrefixSummedEliasFano::deserialize_from(&mut offsets_file)?;
+        self.phf = bincode::deserialize_from(&mut offsets_file)?;
+        self.filter = bincode::deserialize_from(&mut offsets_file)?;
+        Ok(self)
    }

-    /// Returns the path from the index file
+    /// Returns the path for the data file
+    pub fn data_path(&self) -> &Path {
+        self.path.as_ref()
+    }
+
+    /// Returns the path for the index file
    pub fn index_path(&self) -> PathBuf {
-        let data_path = self.data_path();
-        data_path
-            .parent()
-            .expect("exists")
-            .join(format!("{}.idx", data_path.file_name().expect("exists").to_string_lossy()))
+        self.path.with_extension(INDEX_FILE_EXTENSION)
    }

-    /// Returns a [`MmapHandle`] of the data file
-    pub fn open_data(&self) -> Result<MmapHandle, NippyJarError> {
-        MmapHandle::new(self.data_path())
+    /// Returns the path for the offsets file
+    pub fn offsets_path(&self) -> PathBuf {
+        self.path.with_extension(OFFSETS_FILE_EXTENSION)
+    }
+
+    /// Returns the path for the config file
+    pub fn config_path(&self) -> PathBuf {
+        self.path.with_extension(CONFIG_FILE_EXTENSION)
+    }
+
+    /// Returns a [`DataReader`] of the data and offset file
+    pub fn open_data_reader(&self) -> Result<DataReader, NippyJarError> {
+        DataReader::new(self.data_path())
    }

    /// If required, prepares any compression algorithm to an early pass of the data.
@@ -338,124 +346,44 @@ where
        columns: Vec<impl IntoIterator<Item = ColumnResult<Vec<u8>>>>,
        total_rows: u64,
    ) -> Result<(), NippyJarError> {
-        let mut file = self.freeze_check(&columns)?;
-        self.freeze_config(&mut file)?;
+        self.check_before_freeze(&columns)?;

-        // Special case for zstd that might use custom dictionaries/compressors per column
-        // If any other compression algorithm is added and uses a similar flow, then revisit
-        // implementation
-        let mut maybe_zstd_compressors = None;
-        if let Some(Compressors::Zstd(zstd)) = &self.compressor {
-            maybe_zstd_compressors = zstd.compressors()?;
-        }
+        debug!(target: "nippy-jar", path=?self.data_path(), "Opening data file.");

-        // Temporary buffer to avoid multiple reallocations if compressing to a buffer (eg. zstd w/
-        // dict)
-        let mut tmp_buf = Vec::with_capacity(1_000_000);
+        // Creates the writer, data and offsets file
+        let mut writer = NippyJarWriter::new(self)?;

-        // Write all rows while taking all row start offsets
-        let mut row_number = 0u64;
-        let mut offsets = Vec::with_capacity(total_rows as usize * self.columns);
-        let mut column_iterators =
-            columns.into_iter().map(|v| v.into_iter()).collect::<Vec<_>>().into_iter();
+        // Append rows to file while holding offsets in memory
+        writer.append_rows(columns, total_rows)?;

-        debug!(target: "nippy-jar", compressor=?self.compressor, "Writing rows.");
+        // Flushes configuration and offsets to disk
+        writer.commit()?;

-        loop {
-            let mut iterators = Vec::with_capacity(self.columns);
+        // Write phf, filter and offset index to file
+        self.freeze_filters()?;

-            // Write the column value of each row
-            // TODO: iter_mut if we remove the IntoIterator interface.
-            let mut uncompressed_row_size = 0;
-            for (column_number, mut column_iter) in column_iterators.enumerate() {
-                offsets.push(file.stream_position()? as usize);
-
-                match column_iter.next() {
-                    Some(Ok(value)) => {
-                        uncompressed_row_size += value.len();
-
-                        if let Some(compression) = &self.compressor {
-                            // Special zstd case with dictionaries
-                            if let (Some(dict_compressors), Compressors::Zstd(_)) =
-                                (maybe_zstd_compressors.as_mut(), compression)
-                            {
-                                compression::Zstd::compress_with_dictionary(
-                                    &value,
-                                    &mut tmp_buf,
-                                    &mut file,
-                                    Some(dict_compressors.get_mut(column_number).expect("exists")),
-                                )?;
-                            } else {
-                                let before = tmp_buf.len();
-                                let len = compression.compress_to(&value, &mut tmp_buf)?;
-                                file.write_all(&tmp_buf[before..before + len])?;
-                            }
-                        } else {
-                            file.write_all(&value)?;
-                        }
-                    }
-                    None => {
-                        return Err(NippyJarError::UnexpectedMissingValue(
-                            row_number,
-                            column_number as u64,
-                        ))
-                    }
-                    Some(Err(err)) => return Err(err.into()),
-                }
-
-                iterators.push(column_iter);
-            }
-
-            tmp_buf.clear();
-            row_number += 1;
-            self.max_row_size = self.max_row_size.max(uncompressed_row_size);
-
-            if row_number == total_rows {
-                break
-            }
-
-            column_iterators = iterators.into_iter();
-        }
-
-        // drops immutable borrow
-        drop(maybe_zstd_compressors);
-
-        // Write offsets and offset index to file
-        self.freeze_offsets(offsets)?;
-
-        debug!(target: "nippy-jar", jar=?self, "Finished.");
+        debug!(target: "nippy-jar", jar=?self, "Finished writing data.");

        Ok(())
    }

-    /// Freezes offsets and its own index.
-    fn freeze_offsets(&mut self, offsets: Vec<usize>) -> Result<(), NippyJarError> {
-        if !offsets.is_empty() {
-            debug!(target: "nippy-jar", "Encoding offsets list.");
-
-            let mut builder =
-                EliasFanoBuilder::new(*offsets.last().expect("qed") + 1, offsets.len())?;
-
-            for offset in offsets {
-                builder.push(offset)?;
-            }
-            self.offsets = builder.build().enable_rank();
-        }
-
+    /// Freezes [`PerfectHashingFunction`], [`InclusionFilter`] and the offset index to file.
+    fn freeze_filters(&mut self) -> Result<(), NippyJarError> {
        debug!(target: "nippy-jar", path=?self.index_path(), "Writing offsets and offsets index to file.");

        let mut file = File::create(self.index_path())?;
-        self.offsets.serialize_into(&mut file)?;
        self.offsets_index.serialize_into(&mut file)?;
-        self.max_row_size.serialize_into(file)?;
+        bincode::serialize_into(&mut file, &self.phf)?;
+        bincode::serialize_into(&mut file, &self.filter)?;
+
        Ok(())
    }

    /// Safety checks before creating and returning a [`File`] handle to write data to.
-    fn freeze_check(
+    fn check_before_freeze(
        &mut self,
        columns: &[impl IntoIterator<Item = ColumnResult<Vec<u8>>>],
-    ) -> Result<File, NippyJarError> {
+    ) -> Result<(), NippyJarError> {
        if columns.len() != self.columns {
            return Err(NippyJarError::ColumnLenMismatch(self.columns, columns.len()))
        }
@@ -471,23 +399,16 @@ where
            let _ = phf.get_index(&[])?;
        }

-        debug!(target: "nippy-jar", path=?self.data_path(), "Opening data file.");
-
-        Ok(File::create(self.data_path())?)
+        Ok(())
    }

    /// Writes all necessary configuration to file.
-    fn freeze_config(&mut self, handle: &mut File) -> Result<(), NippyJarError> {
-        // TODO Split Dictionaries and Bloomfilters Configuration so we dont have to load everything
-        // at once
-        Ok(bincode::serialize_into(handle, &self)?)
+    fn freeze_config(&mut self) -> Result<(), NippyJarError> {
+        Ok(bincode::serialize_into(File::create(self.config_path())?, &self)?)
    }
 }

-impl<H> InclusionFilter for NippyJar<H>
-where
-    H: Send + Sync + Serialize + for<'a> Deserialize<'a>,
-{
+impl<H: NippyJarHeader> InclusionFilter for NippyJar<H> {
    fn add(&mut self, element: &[u8]) -> Result<(), NippyJarError> {
        self.filter.as_mut().ok_or(NippyJarError::FilterMissing)?.add(element)
    }
@@ -501,10 +422,7 @@ where
    }
 }

-impl<H> PerfectHashingFunction for NippyJar<H>
-where
-    H: Send + Sync + Serialize + for<'a> Deserialize<'a>,
-{
+impl<H: NippyJarHeader> PerfectHashingFunction for NippyJar<H> {
    fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
        self.phf.as_mut().ok_or(NippyJarError::PHFMissing)?.set_keys(keys)
    }
@@ -514,31 +432,94 @@ where
    }
 }

-/// Holds an `Arc` over a file and its associated mmap handle.
-#[derive(Debug, Clone)]
-pub struct MmapHandle {
-    /// File descriptor. Needs to be kept alive as long as the mmap handle.
+/// Manages the reading of snapshot data using memory-mapped files.
+///
+/// Holds file and mmap descriptors of the data and offsets files of a snapshot.
+#[derive(Debug)]
+pub struct DataReader {
+    /// Data file descriptor. Needs to be kept alive as long as `data_mmap` handle.
    #[allow(unused)]
-    file: Arc<File>,
-    /// Mmap handle.
-    mmap: Arc<Mmap>,
+    data_file: File,
+    /// Mmap handle for data.
+    data_mmap: Mmap,
+    /// Offset file descriptor. Needs to be kept alive as long as `offset_mmap` handle.
+    #[allow(unused)]
+    offset_file: File,
+    /// Mmap handle for offsets.
+    offset_mmap: Mmap,
+    /// Number of bytes that represent one offset.
+    offset_size: u64,
 }

-impl MmapHandle {
+impl DataReader {
+    /// Reads the respective data and offsets file and returns [`DataReader`].
    pub fn new(path: impl AsRef<Path>) -> Result<Self, NippyJarError> {
-        let file = File::open(path)?;
-
+        let data_file = File::open(path.as_ref())?;
        // SAFETY: File is read-only and its descriptor is kept alive as long as the mmap handle.
-        let mmap = unsafe { Mmap::map(&file)? };
+        let data_mmap = unsafe { Mmap::map(&data_file)? };

-        Ok(Self { file: Arc::new(file), mmap: Arc::new(mmap) })
+        let offset_file = File::open(path.as_ref().with_extension(OFFSETS_FILE_EXTENSION))?;
+        // SAFETY: File is read-only and its descriptor is kept alive as long as the mmap handle.
+        let offset_mmap = unsafe { Mmap::map(&offset_file)? };
+
+        Ok(Self {
+            data_file,
+            data_mmap,
+            offset_file,
+            // First byte is the size of one offset in bytes
+            offset_size: offset_mmap[0] as u64,
+            offset_mmap,
+        })
    }
-}

-impl Deref for MmapHandle {
-    type Target = Mmap;
-    fn deref(&self) -> &Self::Target {
-        &self.mmap
+    /// Returns the offset for the requested data index
+    pub fn offset(&self, index: usize) -> u64 {
+        // + 1 represents the offset_len u8 which is in the beginning of the file
+        let from = index * self.offset_size as usize + 1;
+
+        self.offset_at(from)
+    }
+
+    /// Returns the offset for the requested data index starting from the end
+    pub fn reverse_offset(&self, index: usize) -> Result<u64, NippyJarError> {
+        let offsets_file_size = self.offset_file.metadata()?.len() as usize;
+
+        if offsets_file_size > 1 {
+            let from = offsets_file_size - self.offset_size as usize * (index + 1);
+
+            Ok(self.offset_at(from))
+        } else {
+            Ok(0)
+        }
+    }
+
+    /// Returns total number of offsets in the file.
+    /// The size of one offset is determined by the file itself.
+    pub fn offsets_count(&self) -> Result<usize, NippyJarError> {
+        Ok((self.offset_file.metadata()?.len().saturating_sub(1) / self.offset_size) as usize)
+    }
+
+    /// Reads one offset-sized (determined by the offset file) u64 at the provided index.
+    fn offset_at(&self, index: usize) -> u64 {
+        let mut buffer: [u8; 8] = [0; 8];
+        buffer[..self.offset_size as usize]
+            .copy_from_slice(&self.offset_mmap[index..(index + self.offset_size as usize)]);
+        u64::from_le_bytes(buffer)
+    }
+
+    /// Returns number of bytes that represent one offset.
+    pub fn offset_size(&self) -> u64 {
+        self.offset_size
+    }
+
+    /// Returns the underlying data as a slice of bytes for the provided range.
+    pub fn data(&self, range: Range<usize>) -> &[u8] {
+        &self.data_mmap[range]
+    }
+
+    /// Returns total size of data
+    pub fn size(&self) -> usize {
+        self.data_mmap.len()
    }
 }

@@ -546,7 +527,7 @@ impl Deref for MmapHandle {
 mod tests {
    use super::*;
    use rand::{rngs::SmallRng, seq::SliceRandom, RngCore, SeedableRng};
-    use std::collections::HashSet;
+    use std::{collections::HashSet, fs::OpenOptions};

    type ColumnResults<T> = Vec<ColumnResult<T>>;
    type ColumnValues = Vec<Vec<u8>>;
@@ -610,7 +591,8 @@ mod tests {
            nippy
                .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows)
                .unwrap();
-            let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
+            let loaded_nippy =
+                NippyJar::load_without_header(file_path.path()).unwrap().load_filters().unwrap();
            assert_eq!(indexes, collect_indexes(&loaded_nippy));
        };

@@ -654,7 +636,8 @@ mod tests {
        assert!(InclusionFilter::add(&mut nippy, &col1[3]).is_ok());

        nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap();
-        let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
+        let loaded_nippy =
+            NippyJar::load_without_header(file_path.path()).unwrap().load_filters().unwrap();

        assert_eq!(nippy, loaded_nippy);

@@ -707,13 +690,13 @@ mod tests {

        nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap();

-        let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
+        let loaded_nippy =
+            NippyJar::load_without_header(file_path.path()).unwrap().load_filters().unwrap();
        assert_eq!(nippy.version, loaded_nippy.version);
        assert_eq!(nippy.columns, loaded_nippy.columns);
        assert_eq!(nippy.filter, loaded_nippy.filter);
        assert_eq!(nippy.phf, loaded_nippy.phf);
        assert_eq!(nippy.offsets_index, loaded_nippy.offsets_index);
-        assert_eq!(nippy.offsets, loaded_nippy.offsets);
        assert_eq!(nippy.max_row_size, loaded_nippy.max_row_size);
        assert_eq!(nippy.path, loaded_nippy.path);

@@ -751,6 +734,7 @@ mod tests {
        nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap();

        let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
+        let loaded_nippy = loaded_nippy.load_filters().unwrap();
        assert_eq!(nippy, loaded_nippy);

        if let Some(Compressors::Lz4(_)) = loaded_nippy.compressor() {
@@ -786,7 +770,8 @@ mod tests {

        nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap();

-        let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
+        let loaded_nippy =
+            NippyJar::load_without_header(file_path.path()).unwrap().load_filters().unwrap();
        assert_eq!(nippy, loaded_nippy);

        if let Some(Compressors::Zstd(zstd)) = loaded_nippy.compressor() {
@@ -841,7 +826,8 @@ mod tests {

        // Read file
        {
-            let loaded_nippy = NippyJar::<BlockJarHeader>::load(file_path.path()).unwrap();
+            let loaded_nippy =
+                NippyJar::<BlockJarHeader>::load(file_path.path()).unwrap().load_filters().unwrap();

            assert!(loaded_nippy.compressor().is_some());
            assert!(loaded_nippy.filter.is_some());
@@ -911,7 +897,8 @@ mod tests {

        // Read file
        {
-            let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
+            let loaded_nippy =
+                NippyJar::load_without_header(file_path.path()).unwrap().load_filters().unwrap();

            if let Some(Compressors::Zstd(_zstd)) = loaded_nippy.compressor() {
                let mut cursor = NippyJarCursor::new(&loaded_nippy).unwrap();
@@ -1013,4 +1000,294 @@ mod tests {
            }
        }
    }
+
+    #[test]
+    fn test_writer() {
+        let (col1, col2) = test_data(None);
+        let num_columns = 2;
+        let file_path = tempfile::NamedTempFile::new().unwrap();
+
+        append_two_rows(num_columns, file_path.path(), &col1, &col2);
+
+        // Appends a third row and prunes two rows, to make sure we prune from memory and disk
+        // offset list
+        prune_rows(num_columns, file_path.path(), &col1, &col2);
+
+        // Should be able to append new rows
+        append_two_rows(num_columns, file_path.path(), &col1, &col2);
+
+        // Simulate an unexpected shutdown before there's a chance to commit, and see that it
+        // unwinds successfully
+        test_append_consistency_no_commit(file_path.path(), &col1, &col2);
+
+        // Simulate an unexpected shutdown during commit, and see that it unwinds successfully
+        test_append_consistency_partial_commit(file_path.path(), &col1, &col2);
+    }
+
+    #[test]
+    fn test_pruner() {
+        let (col1, col2) = test_data(None);
+        let num_columns = 2;
+        let num_rows = 2;
+
+        // (missing_offsets, expected number of rows)
+        // If a row wasnt fully pruned, then it should clear it up as well
+        let missing_offsets_scenarios = [(1, 1), (2, 1), (3, 0)];
+
+        for (missing_offsets, expected_rows) in missing_offsets_scenarios {
+            let file_path = tempfile::NamedTempFile::new().unwrap();
+
+            append_two_rows(num_columns, file_path.path(), &col1, &col2);
+
+            simulate_interrupted_prune(num_columns, file_path.path(), num_rows, missing_offsets);
+
+            let nippy = NippyJar::load_without_header(file_path.path()).unwrap();
+            assert_eq!(nippy.rows, expected_rows);
+        }
+    }
+
+    fn test_append_consistency_partial_commit(
+        file_path: &Path,
+        col1: &[Vec<u8>],
+        col2: &[Vec<u8>],
+    ) {
+        let mut nippy = NippyJar::load_without_header(file_path).unwrap();
+
+        // Set the baseline that should be unwinded to
+        let initial_rows = nippy.rows;
+        let initial_data_size =
+            File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize;
+        let initial_offset_size =
+            File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len() as usize;
+        assert!(initial_data_size > 0);
+        assert!(initial_offset_size > 0);
+
+        // Appends a third row
+        let mut writer = NippyJarWriter::new(&mut nippy).unwrap();
+        writer.append_column(Some(Ok(&col1[2]))).unwrap();
+        writer.append_column(Some(Ok(&col2[2]))).unwrap();
+
+        // Makes sure it doesn't write the last one offset (which is the expected file data size)
+        let _ = writer.offsets_mut().pop();
+
+        // `commit_offsets` is not a pub function. we call it here to simulate the shutdown before
+        // it can flush nippy.rows (config) to disk.
+        writer.commit_offsets().unwrap();
+
+        // Simulate an unexpected shutdown of the writer, before it can finish commit()
+        drop(writer);
+
+        let mut nippy = NippyJar::load_without_header(file_path).unwrap();
+        assert_eq!(initial_rows, nippy.rows);
+
+        // Data was written successfuly
+        let new_data_size =
+            File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize;
+        assert_eq!(new_data_size, initial_data_size + col1[2].len() + col2[2].len());
+
+        // It should be + 16 (two columns were added), but there's a missing one (the one we pop)
+        assert_eq!(
+            initial_offset_size + 8,
+            File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len() as usize
+        );
+
+        // Writer will execute a consistency check and verify first that the offset list on disk
+        // doesn't match the nippy.rows, and prune it. Then, it will prune the data file
+        // accordingly as well.
+        let _writer = NippyJarWriter::new(&mut nippy).unwrap();
+        assert_eq!(initial_rows, nippy.rows);
+        assert_eq!(
+            initial_offset_size,
+            File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len() as usize
+        );
+        assert_eq!(
+            initial_data_size,
+            File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize
+        );
+        assert_eq!(initial_rows, nippy.rows);
+    }
+
+    fn test_append_consistency_no_commit(file_path: &Path, col1: &[Vec<u8>], col2: &[Vec<u8>]) {
+        let mut nippy = NippyJar::load_without_header(file_path).unwrap();
+
+        // Set the baseline that should be unwinded to
+        let initial_rows = nippy.rows;
+        let initial_data_size =
+            File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize;
+        let initial_offset_size =
+            File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len() as usize;
+        assert!(initial_data_size > 0);
+        assert!(initial_offset_size > 0);
+
+        // Appends a third row, so we have an offset list in memory, which is not flushed to disk,
+        // while the data has been.
+        let mut writer = NippyJarWriter::new(&mut nippy).unwrap();
+        writer.append_column(Some(Ok(&col1[2]))).unwrap();
+        writer.append_column(Some(Ok(&col2[2]))).unwrap();
+
+        // Simulate an unexpected shutdown of the writer, before it can call commit()
+        drop(writer);
+
+        let mut nippy = NippyJar::load_without_header(file_path).unwrap();
+        assert_eq!(initial_rows, nippy.rows);
+
+        // Data was written successfuly
+        let new_data_size =
+            File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize;
+        assert_eq!(new_data_size, initial_data_size + col1[2].len() + col2[2].len());
+
+        // Since offsets only get written on commit(), this remains the same
+        assert_eq!(
+            initial_offset_size,
+            File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len() as usize
+        );
+
+        // Writer will execute a consistency check and verify that the data file has more data than
+        // it should, and resets it to the last offset of the list (on disk here)
+        let _writer = NippyJarWriter::new(&mut nippy).unwrap();
+        assert_eq!(initial_rows, nippy.rows);
+        assert_eq!(
+            initial_data_size,
+            File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize
+        );
+        assert_eq!(initial_rows, nippy.rows);
+    }
+
+    fn append_two_rows(num_columns: usize, file_path: &Path, col1: &[Vec<u8>], col2: &[Vec<u8>]) {
+        // Create and add 1 row
+        {
+            let mut nippy = NippyJar::new_without_header(num_columns, file_path);
+            nippy.freeze_config().unwrap();
+            assert_eq!(nippy.max_row_size, 0);
+            assert_eq!(nippy.rows, 0);
+
+            let mut writer = NippyJarWriter::new(&mut nippy).unwrap();
+            assert_eq!(writer.column(), 0);
+
+            writer.append_column(Some(Ok(&col1[0]))).unwrap();
+            assert_eq!(writer.column(), 1);
+
+            writer.append_column(Some(Ok(&col2[0]))).unwrap();
+
+            // Adding last column of a row resets writer and updates jar config
+            assert_eq!(writer.column(), 0);
+
+            // One offset per column + 1 offset at the end representing the expected file data size
+            assert_eq!(writer.offsets().len(), 3);
+            let expected_data_file_size = *writer.offsets().last().unwrap();
+            writer.commit().unwrap();
+
+            assert_eq!(nippy.max_row_size, col1[0].len() + col2[0].len());
+            assert_eq!(nippy.rows, 1);
+            assert_eq!(
+                File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len(),
+                1 + num_columns as u64 * 8 + 8
+            );
+            assert_eq!(
+                File::open(nippy.data_path()).unwrap().metadata().unwrap().len(),
+                expected_data_file_size
+            );
+        }
+
+        // Load and add 1 row
+        {
+            let mut nippy = NippyJar::load_without_header(file_path).unwrap();
+            // Check if it was committed successfuly
+            assert_eq!(nippy.max_row_size, col1[0].len() + col2[0].len());
+            assert_eq!(nippy.rows, 1);
+
+            let mut writer = NippyJarWriter::new(&mut nippy).unwrap();
+            assert_eq!(writer.column(), 0);
+
+            writer.append_column(Some(Ok(&col1[1]))).unwrap();
+            assert_eq!(writer.column(), 1);
+
+            writer.append_column(Some(Ok(&col2[1]))).unwrap();
+
+            // Adding last column of a row resets writer and updates jar config
+            assert_eq!(writer.column(), 0);
+
+            // One offset per column + 1 offset at the end representing the expected file data size
+            assert_eq!(writer.offsets().len(), 3);
+            let expected_data_file_size = *writer.offsets().last().unwrap();
+            writer.commit().unwrap();
+
+            assert_eq!(nippy.max_row_size, col1[0].len() + col2[0].len());
+            assert_eq!(nippy.rows, 2);
+            assert_eq!(
+                File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len(),
+                1 + nippy.rows as u64 * num_columns as u64 * 8 + 8
+            );
+            assert_eq!(
+                File::open(nippy.data_path()).unwrap().metadata().unwrap().len(),
+                expected_data_file_size
+            );
+        }
+    }
+
+    fn prune_rows(num_columns: usize, file_path: &Path, col1: &[Vec<u8>], col2: &[Vec<u8>]) {
+        let mut nippy = NippyJar::load_without_header(file_path).unwrap();
+        let mut writer = NippyJarWriter::new(&mut nippy).unwrap();
+
+        // Appends a third row, so we have an offset list in memory, which is not flushed to disk
+        writer.append_column(Some(Ok(&col1[2]))).unwrap();
+        writer.append_column(Some(Ok(&col2[2]))).unwrap();
+
+        // This should prune from the on-memory offset list and ondisk offset list
+        writer.prune_rows(2).unwrap();
+        assert_eq!(nippy.rows, 1);
+
+        assert_eq!(
+            File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len(),
+            1 + nippy.rows as u64 * num_columns as u64 * 8 + 8
+        );
+
+        let expected_data_size = col1[0].len() + col2[0].len();
+        assert_eq!(
+            File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize,
+            expected_data_size
+        );
+
+        let data_reader = nippy.open_data_reader().unwrap();
+        // there are only two valid offsets. so index 2 actually represents the expected file
+        // data size.
+        assert_eq!(data_reader.offset(2), expected_data_size as u64);
+
+        // This should prune from the ondisk offset list and clear the jar.
+        let mut writer = NippyJarWriter::new(&mut nippy).unwrap();
+        writer.prune_rows(1).unwrap();
+        assert_eq!(nippy.rows, 0);
+        assert_eq!(nippy.max_row_size, 0);
+        assert_eq!(File::open(nippy.data_path()).unwrap().metadata().unwrap().len() as usize, 0);
+        // Only the byte that indicates how many bytes per offset should be left
+        assert_eq!(File::open(nippy.offsets_path()).unwrap().metadata().unwrap().len() as usize, 1);
+    }
+
+    fn simulate_interrupted_prune(
+        num_columns: usize,
+        file_path: &Path,
+        num_rows: u64,
+        missing_offsets: u64,
+    ) {
+        let mut nippy = NippyJar::load_without_header(file_path).unwrap();
+        let reader = nippy.open_data_reader().unwrap();
+        let offsets_file =
+            OpenOptions::new().read(true).write(true).open(nippy.offsets_path()).unwrap();
+        let offsets_len = 1 + num_rows * num_columns as u64 * 8 + 8;
+        assert_eq!(offsets_len, offsets_file.metadata().unwrap().len());
+
+        let data_file = OpenOptions::new().read(true).write(true).open(nippy.data_path()).unwrap();
+        let data_len = reader.reverse_offset(0).unwrap();
+        assert_eq!(data_len, data_file.metadata().unwrap().len());
+
+        // each data column is 32 bytes long
+        // by deleting from the data file, the `consistency_check` will go through both branches:
+        //      when the offset list wasn't updated after clearing the data (data_len > last
+        // offset).      fixing above, will lead to offset count not match the rows (*
+        // columns) of the configuration file
+        data_file.set_len(data_len - 32 * missing_offsets).unwrap();
+
+        // runs the consistency check.
+        let _ = NippyJarWriter::new(&mut nippy).unwrap();
+    }
 }
--- a/crates/storage/nippy-jar/src/writer.rs
+++ b/crates/storage/nippy-jar/src/writer.rs
@@ -0,0 +1,408 @@
+use crate::{compression::Compression, ColumnResult, NippyJar, NippyJarError, NippyJarHeader};
+use std::{
+    cmp::Ordering,
+    fs::{File, OpenOptions},
+    io::{Read, Seek, SeekFrom, Write},
+    path::Path,
+};
+
+/// Size of one offset in bytes.
+const OFFSET_SIZE_BYTES: u64 = 8;
+
+/// Writer of [`NippyJar`]. Handles table data and offsets only.
+///
+/// Table data is written directly to disk, while offsets and configuration need to be flushed by
+/// calling `commit()`.
+///
+/// ## Offset file layout
+/// The first byte is the size of a single offset in bytes, `m`.
+/// Then, the file contains `n` entries, each with a size of `m`. Each entry represents an offset,
+/// except for the last entry, which represents both the total size of the data file, as well as the
+/// next offset to write new data to.
+///
+/// ## Data file layout
+/// The data file is represented just as a sequence of bytes of data without any delimiters
+pub struct NippyJarWriter<'a, H> {
+    /// Reference to the associated [`NippyJar`], containing all necessary configurations for data
+    /// handling.
+    jar: &'a mut NippyJar<H>,
+    /// File handle to where the data is stored.
+    data_file: File,
+    /// File handle to where the offsets are stored.
+    offsets_file: File,
+    /// Temporary buffer to reuse when compressing data.
+    tmp_buf: Vec<u8>,
+    /// Used to find the maximum uncompressed size of a row in a jar.
+    uncompressed_row_size: usize,
+    /// Partial offset list which hasn't been flushed to disk.
+    offsets: Vec<u64>,
+    /// Column where writer is going to write next.
+    column: usize,
+}
+
+impl<'a, H: NippyJarHeader> NippyJarWriter<'a, H> {
+    pub fn new(jar: &'a mut NippyJar<H>) -> Result<Self, NippyJarError> {
+        let (data_file, offsets_file, is_created) =
+            Self::create_or_open_files(jar.data_path(), &jar.offsets_path())?;
+
+        let mut writer = Self {
+            jar,
+            data_file,
+            offsets_file,
+            tmp_buf: Vec::with_capacity(1_000_000),
+            uncompressed_row_size: 0,
+            offsets: Vec::with_capacity(1_000_000),
+            column: 0,
+        };
+
+        // If we are opening a previously created jar, we need to check its consistency, and make
+        // changes if necessary.
+        if !is_created {
+            writer.check_consistency_and_heal()?;
+        }
+
+        Ok(writer)
+    }
+
+    fn create_or_open_files(
+        data: &Path,
+        offsets: &Path,
+    ) -> Result<(File, File, bool), NippyJarError> {
+        let is_created = !data.exists() || !offsets.exists();
+
+        let mut data_file = if !data.exists() {
+            File::create(data)?
+        } else {
+            OpenOptions::new().read(true).write(true).open(data)?
+        };
+        data_file.seek(SeekFrom::End(0))?;
+
+        let mut offsets_file = if !offsets.exists() {
+            let mut offsets = File::create(offsets)?;
+
+            // First byte of the offset file is the size of one offset in bytes
+            offsets.write_all(&[OFFSET_SIZE_BYTES as u8])?;
+            offsets.sync_all()?;
+
+            offsets
+        } else {
+            OpenOptions::new().read(true).write(true).open(offsets)?
+        };
+        offsets_file.seek(SeekFrom::End(0))?;
+
+        Ok((data_file, offsets_file, is_created))
+    }
+
+    /// Performs consistency checks on the [`NippyJar`] file and acts upon any issues:
+    /// * Is the offsets file size expected?
+    /// * Is the data file size expected?
+    ///
+    /// This is based on the assumption that [`NippyJar`] configuration is **always** the last one
+    /// to be updated when something is written, as by the `commit()` function shows.
+    fn check_consistency_and_heal(&mut self) -> Result<(), NippyJarError> {
+        let reader = self.jar.open_data_reader()?;
+
+        // When an offset size is smaller than the initial (8), we are dealing with immutable
+        // data.
+        if reader.offset_size() != OFFSET_SIZE_BYTES {
+            return Err(NippyJarError::FrozenJar)
+        }
+
+        let expected_offsets_file_size = 1 + // first byte is the size of one offset
+            OFFSET_SIZE_BYTES * self.jar.rows as u64 * self.jar.columns as u64 + // `offset size * num rows * num columns`
+            OFFSET_SIZE_BYTES; // expected size of the data file
+        let actual_offsets_file_size = self.offsets_file.metadata()?.len();
+
+        // Offsets configuration wasn't properly committed
+        match expected_offsets_file_size.cmp(&actual_offsets_file_size) {
+            Ordering::Less => {
+                // Happened during an appending job
+                // TODO: ideally we could truncate until the last offset of the last column of the
+                //  last row inserted
+                self.offsets_file.set_len(expected_offsets_file_size)?;
+            }
+            Ordering::Greater => {
+                // Happened during a pruning job
+                // `num rows = (file size - 1 - size of one offset) / num columns`
+                self.jar.rows = ((actual_offsets_file_size.
+                    saturating_sub(1). // first byte is the size of one offset
+                    saturating_sub(OFFSET_SIZE_BYTES) / // expected size of the data file
+                    (self.jar.columns as u64)) /
+                    OFFSET_SIZE_BYTES) as usize;
+
+                // Freeze row count changed
+                self.jar.freeze_config()?;
+            }
+            Ordering::Equal => {}
+        }
+
+        // last offset should match the data_file_len
+        let last_offset = reader.reverse_offset(0)?;
+        let data_file_len = self.data_file.metadata()?.len();
+
+        // Offset list wasn't properly committed
+        match last_offset.cmp(&data_file_len) {
+            Ordering::Less => {
+                // Happened during an appending job, so we need to truncate the data, since there's
+                // no way to recover it.
+                self.data_file.set_len(last_offset)?;
+            }
+            Ordering::Greater => {
+                // Happened during a pruning job, so we need to reverse iterate offsets until we
+                // find the matching one.
+                for index in 0..reader.offsets_count()? {
+                    let offset = reader.reverse_offset(index + 1)?;
+                    if offset == data_file_len {
+                        self.offsets_file.set_len(
+                            self.offsets_file
+                                .metadata()?
+                                .len()
+                                .saturating_sub(OFFSET_SIZE_BYTES * (index as u64 + 1)),
+                        )?;
+
+                        drop(reader);
+
+                        // Since we decrease the offset list, we need to check the consistency of
+                        // `self.jar.rows` again
+                        self.check_consistency_and_heal()?;
+                        break
+                    }
+                }
+            }
+            Ordering::Equal => {}
+        }
+
+        self.offsets_file.seek(SeekFrom::End(0))?;
+        self.data_file.seek(SeekFrom::End(0))?;
+
+        Ok(())
+    }
+
+    /// Appends rows to data file.  `fn commit()` should be called to flush offsets and config to
+    /// disk.
+    ///
+    /// `column_values_per_row`: A vector where each element is a column's values in sequence,
+    /// corresponding to each row. The vector's length equals the number of columns.
+    pub fn append_rows(
+        &mut self,
+        column_values_per_row: Vec<impl IntoIterator<Item = ColumnResult<impl AsRef<[u8]>>>>,
+        num_rows: u64,
+    ) -> Result<(), NippyJarError> {
+        let mut column_iterators = column_values_per_row
+            .into_iter()
+            .map(|v| v.into_iter())
+            .collect::<Vec<_>>()
+            .into_iter();
+
+        for _ in 0..num_rows {
+            let mut iterators = Vec::with_capacity(self.jar.columns);
+
+            for mut column_iter in column_iterators {
+                self.append_column(column_iter.next())?;
+
+                iterators.push(column_iter);
+            }
+
+            column_iterators = iterators.into_iter();
+        }
+
+        Ok(())
+    }
+
+    /// Appends a column to data file. `fn commit()` should be called to flush offsets and config to
+    /// disk.
+    pub fn append_column(
+        &mut self,
+        column: Option<ColumnResult<impl AsRef<[u8]>>>,
+    ) -> Result<(), NippyJarError> {
+        match column {
+            Some(Ok(value)) => {
+                if self.offsets.is_empty() {
+                    // Represents the offset of the soon to be appended data column
+                    self.offsets.push(self.data_file.stream_position()?);
+                }
+
+                self.write_column(value.as_ref())?;
+
+                // Last offset represents the size of the data file if no more data is to be
+                // appended. Otherwise, represents the offset of the next data item.
+                self.offsets.push(self.data_file.stream_position()?);
+            }
+            None => {
+                return Err(NippyJarError::UnexpectedMissingValue(
+                    self.jar.rows as u64,
+                    self.column as u64,
+                ))
+            }
+            Some(Err(err)) => return Err(err.into()),
+        }
+
+        Ok(())
+    }
+
+    /// Writes column to data file. If it's the last column of the row, call `finalize_row()`
+    fn write_column(&mut self, value: &[u8]) -> Result<(), NippyJarError> {
+        self.uncompressed_row_size += value.len();
+        if let Some(compression) = &self.jar.compressor {
+            let before = self.tmp_buf.len();
+            let len = compression.compress_to(value, &mut self.tmp_buf)?;
+            self.data_file.write_all(&self.tmp_buf[before..before + len])?;
+        } else {
+            self.data_file.write_all(value)?;
+        }
+
+        self.column += 1;
+
+        if self.jar.columns == self.column {
+            self.finalize_row();
+        }
+
+        Ok(())
+    }
+
+    /// Prunes rows from data and offsets file and updates its configuration on disk
+    pub fn prune_rows(&mut self, num_rows: usize) -> Result<(), NippyJarError> {
+        // Each column of a row is one offset
+        let num_offsets = num_rows * self.jar.columns;
+
+        // Calculate the number of offsets to prune from in-memory list
+        let offsets_prune_count = num_offsets.min(self.offsets.len().saturating_sub(1)); // last element is the expected size of the data file
+        let remaining_to_prune = num_offsets.saturating_sub(offsets_prune_count);
+
+        // Prune in-memory offsets if needed
+        if offsets_prune_count > 0 {
+            // Determine new length based on the offset to prune up to
+            let new_len = self.offsets[(self.offsets.len() - 1) - offsets_prune_count]; // last element is the expected size of the data file
+            self.offsets.truncate(self.offsets.len() - offsets_prune_count);
+
+            // Truncate the data file to the new length
+            self.data_file.set_len(new_len)?;
+        }
+
+        // Prune from on-disk offset list if there are still rows left to prune
+        if remaining_to_prune > 0 {
+            // Get the current length of the on-disk offset file
+            let length = self.offsets_file.metadata()?.len();
+
+            // Handle non-empty offset file
+            if length > 1 {
+                // first byte is reserved for `bytes_per_offset`, which is 8 initially.
+                let num_offsets = (length - 1) / OFFSET_SIZE_BYTES;
+
+                if remaining_to_prune as u64 > num_offsets {
+                    return Err(NippyJarError::InvalidPruning(
+                        num_offsets,
+                        remaining_to_prune as u64,
+                    ))
+                }
+
+                let new_num_offsets = num_offsets.saturating_sub(remaining_to_prune as u64);
+
+                // If all rows are to be pruned
+                if new_num_offsets <= 1 {
+                    // <= 1 because the one offset would actually be the expected file data size
+                    self.offsets_file.set_len(1)?;
+                    self.data_file.set_len(0)?;
+                } else {
+                    // Calculate the new length for the on-disk offset list
+                    let new_len = 1 + new_num_offsets * OFFSET_SIZE_BYTES;
+                    // Seek to the position of the last offset
+                    self.offsets_file
+                        .seek(SeekFrom::Start(new_len.saturating_sub(OFFSET_SIZE_BYTES)))?;
+                    // Read the last offset value
+                    let mut last_offset = [0u8; OFFSET_SIZE_BYTES as usize];
+                    self.offsets_file.read_exact(&mut last_offset)?;
+                    let last_offset = u64::from_le_bytes(last_offset);
+
+                    // Update the lengths of both the offsets and data files
+                    self.offsets_file.set_len(new_len)?;
+                    self.data_file.set_len(last_offset)?;
+                }
+            } else {
+                return Err(NippyJarError::InvalidPruning(0, remaining_to_prune as u64))
+            }
+        }
+
+        self.offsets_file.sync_all()?;
+        self.data_file.sync_all()?;
+
+        self.offsets_file.seek(SeekFrom::End(0))?;
+        self.data_file.seek(SeekFrom::End(0))?;
+
+        self.jar.rows = self.jar.rows.saturating_sub(num_rows);
+        if self.jar.rows == 0 {
+            self.jar.max_row_size = 0;
+        }
+        self.jar.freeze_config()?;
+
+        Ok(())
+    }
+
+    /// Updates [`NippyJar`] with the new row count and maximum uncompressed row size, while
+    /// resetting internal fields.
+    fn finalize_row(&mut self) {
+        self.jar.max_row_size = self.jar.max_row_size.max(self.uncompressed_row_size);
+        self.jar.rows += 1;
+
+        self.tmp_buf.clear();
+        self.uncompressed_row_size = 0;
+        self.column = 0;
+    }
+
+    /// Commits configuration and offsets to disk. It drains the internal offset list.
+    pub fn commit(&mut self) -> Result<(), NippyJarError> {
+        self.data_file.sync_all()?;
+
+        self.commit_offsets()?;
+
+        // Flushes `max_row_size` and total `rows` to disk.
+        self.jar.freeze_config()?;
+
+        Ok(())
+    }
+
+    /// Flushes offsets to disk.
+    pub(crate) fn commit_offsets(&mut self) -> Result<(), NippyJarError> {
+        // The last offset on disk can be the first offset of `self.offsets` given how
+        // `append_column()` works alongside commit. So we need to skip it.
+        let mut last_offset_ondisk = None;
+
+        if self.offsets_file.metadata()?.len() > 1 {
+            self.offsets_file.seek(SeekFrom::End(-(OFFSET_SIZE_BYTES as i64)))?;
+
+            let mut buf = [0u8; OFFSET_SIZE_BYTES as usize];
+            self.offsets_file.read_exact(&mut buf)?;
+            last_offset_ondisk = Some(u64::from_le_bytes(buf));
+        }
+
+        self.offsets_file.seek(SeekFrom::End(0))?;
+
+        // Appends new offsets to disk
+        for offset in self.offsets.drain(..) {
+            if let Some(last_offset_ondisk) = last_offset_ondisk.take() {
+                if last_offset_ondisk == offset {
+                    continue
+                }
+            }
+            self.offsets_file.write_all(&offset.to_le_bytes())?;
+        }
+        self.offsets_file.sync_all()?;
+
+        Ok(())
+    }
+
+    #[cfg(test)]
+    pub fn column(&self) -> usize {
+        self.column
+    }
+
+    #[cfg(test)]
+    pub fn offsets(&self) -> &[u64] {
+        &self.offsets
+    }
+
+    #[cfg(test)]
+    pub fn offsets_mut(&mut self) -> &mut Vec<u64> {
+        &mut self.offsets
+    }
+}
--- a/crates/storage/provider/Cargo.toml
+++ b/crates/storage/provider/Cargo.toml
@@ -35,6 +35,7 @@ itertools.workspace = true
 pin-project.workspace = true
 parking_lot.workspace = true
 dashmap = { version = "5.5", features = ["inline"] }
+strum.workspace = true

 # test-utils
 alloy-rlp = { workspace = true, optional = true }
--- a/crates/storage/provider/src/lib.rs
+++ b/crates/storage/provider/src/lib.rs
@@ -36,3 +36,19 @@ pub use chain::{Chain, DisplayBlocksChain};

 pub mod bundle_state;
 pub use bundle_state::{BundleStateWithReceipts, OriginalValuesKnown, StateChanges, StateReverts};
+
+pub(crate) fn to_range<R: std::ops::RangeBounds<u64>>(bounds: R) -> std::ops::Range<u64> {
+    let start = match bounds.start_bound() {
+        std::ops::Bound::Included(&v) => v,
+        std::ops::Bound::Excluded(&v) => v + 1,
+        std::ops::Bound::Unbounded => 0,
+    };
+
+    let end = match bounds.end_bound() {
+        std::ops::Bound::Included(&v) => v + 1,
+        std::ops::Bound::Excluded(&v) => v,
+        std::ops::Bound::Unbounded => u64::MAX,
+    };
+
+    start..end
+}
--- a/crates/storage/provider/src/providers/database/mod.rs
+++ b/crates/storage/provider/src/providers/database/mod.rs
@@ -83,12 +83,12 @@ impl<DB> ProviderFactory<DB> {
        mut self,
        snapshots_path: PathBuf,
        highest_snapshot_tracker: watch::Receiver<Option<HighestSnapshots>>,
-    ) -> Self {
+    ) -> ProviderResult<Self> {
        self.snapshot_provider = Some(Arc::new(
-            SnapshotProvider::new(snapshots_path)
+            SnapshotProvider::new(snapshots_path)?
                .with_highest_tracker(Some(highest_snapshot_tracker)),
        ));
-        self
+        Ok(self)
    }

    /// Returns reference to the underlying database.
@@ -401,6 +401,13 @@ impl<DB: Database> ReceiptProvider for ProviderFactory<DB> {
    fn receipts_by_block(&self, block: BlockHashOrNumber) -> ProviderResult<Option<Vec<Receipt>>> {
        self.provider()?.receipts_by_block(block)
    }
+
+    fn receipts_by_tx_range(
+        &self,
+        range: impl RangeBounds<TxNumber>,
+    ) -> ProviderResult<Vec<Receipt>> {
+        self.provider()?.receipts_by_tx_range(range)
+    }
 }

 impl<DB: Database> WithdrawalsProvider for ProviderFactory<DB> {
--- a/crates/storage/provider/src/providers/database/provider.rs
+++ b/crates/storage/provider/src/providers/database/provider.rs
@@ -1,6 +1,7 @@
 use crate::{
    bundle_state::{BundleStateInit, BundleStateWithReceipts, HashedStateChanges, RevertsInit},
    providers::{database::metrics, SnapshotProvider},
+    to_range,
    traits::{
        AccountExtReader, BlockSource, ChangeSetReader, ReceiptProvider, StageCheckpointWriter,
    },
@@ -39,9 +40,9 @@ use reth_primitives::{
    trie::Nibbles,
    Account, Address, Block, BlockHash, BlockHashOrNumber, BlockNumber, BlockWithSenders,
    ChainInfo, ChainSpec, GotExpected, Hardfork, Head, Header, PruneCheckpoint, PruneModes,
-    PruneSegment, Receipt, SealedBlock, SealedBlockWithSenders, SealedHeader, StorageEntry,
-    TransactionMeta, TransactionSigned, TransactionSignedEcRecovered, TransactionSignedNoHash,
-    TxHash, TxNumber, Withdrawal, B256, U256,
+    PruneSegment, Receipt, SealedBlock, SealedBlockWithSenders, SealedHeader, SnapshotSegment,
+    StorageEntry, TransactionMeta, TransactionSigned, TransactionSignedEcRecovered,
+    TransactionSignedNoHash, TxHash, TxNumber, Withdrawal, B256, U256,
 };
 use reth_trie::{
    hashed_cursor::HashedPostState, prefix_set::PrefixSetMut, updates::TrieUpdates, StateRoot,
@@ -117,7 +118,7 @@ impl<TX: DbTx> DatabaseProvider<TX> {
        &self,
        range: impl RangeBounds<T::Key>,
        mut f: impl FnMut(T::Value) -> Result<R, DatabaseError>,
-    ) -> Result<Vec<R>, DatabaseError> {
+    ) -> ProviderResult<Vec<R>> {
        self.cursor_read_collect_with_key::<T, R>(range, |_, v| f(v))
    }

@@ -125,7 +126,7 @@ impl<TX: DbTx> DatabaseProvider<TX> {
        &self,
        range: impl RangeBounds<T::Key>,
        f: impl FnMut(T::Key, T::Value) -> Result<R, DatabaseError>,
-    ) -> Result<Vec<R>, DatabaseError> {
+    ) -> ProviderResult<Vec<R>> {
        let capacity = match range_size_hint(&range) {
            Some(0) | None => return Ok(Vec::new()),
            Some(capacity) => capacity,
@@ -139,7 +140,7 @@ impl<TX: DbTx> DatabaseProvider<TX> {
        cursor: &mut impl DbCursorRO<T>,
        range: impl RangeBounds<T::Key>,
        mut f: impl FnMut(T::Value) -> Result<R, DatabaseError>,
-    ) -> Result<Vec<R>, DatabaseError> {
+    ) -> ProviderResult<Vec<R>> {
        let capacity = range_size_hint(&range).unwrap_or(0);
        self.cursor_collect_with_capacity(cursor, range, capacity, |_, v| f(v))
    }
@@ -150,7 +151,7 @@ impl<TX: DbTx> DatabaseProvider<TX> {
        range: impl RangeBounds<T::Key>,
        capacity: usize,
        mut f: impl FnMut(T::Key, T::Value) -> Result<R, DatabaseError>,
-    ) -> Result<Vec<R>, DatabaseError> {
+    ) -> ProviderResult<Vec<R>> {
        let mut items = Vec::with_capacity(capacity);
        for entry in cursor.walk_range(range)? {
            let (key, value) = entry?;
@@ -245,6 +246,113 @@ impl<TX: DbTx> DatabaseProvider<TX> {
            .walk(Some(T::Key::default()))?
            .collect::<Result<Vec<_>, DatabaseError>>()
    }
+
+    /// Gets data within a specified range, potentially spanning different snapshots and database.
+    ///
+    /// # Arguments
+    /// * `segment` - The segment of the snapshot to query.
+    /// * `block_range` - The range of data to fetch.
+    /// * `fetch_from_snapshot` - A function to fetch data from the snapshot.
+    /// * `fetch_from_database` - A function to fetch data from the database.
+    /// * `predicate` - A function used to evaluate each item in the fetched data. Fetching is
+    ///   terminated when this function returns false, thereby filtering the data based on the
+    ///   provided condition.
+    fn get_range_with_snapshot<T, P, FS, FD>(
+        &self,
+        segment: SnapshotSegment,
+        mut block_or_tx_range: Range<u64>,
+        fetch_from_snapshot: FS,
+        mut fetch_from_database: FD,
+        mut predicate: P,
+    ) -> ProviderResult<Vec<T>>
+    where
+        FS: Fn(&SnapshotProvider, Range<u64>, &mut P) -> ProviderResult<Vec<T>>,
+        FD: FnMut(Range<u64>, P) -> ProviderResult<Vec<T>>,
+        P: FnMut(&T) -> bool,
+    {
+        let mut data = Vec::new();
+
+        if let Some(snapshot_provider) = &self.snapshot_provider {
+            // If there is, check the maximum block or transaction number of the segment.
+            if let Some(snapshot_upper_bound) = match segment {
+                SnapshotSegment::Headers => snapshot_provider.get_highest_snapshot_block(segment),
+                SnapshotSegment::Transactions | SnapshotSegment::Receipts => {
+                    snapshot_provider.get_highest_snapshot_tx(segment)
+                }
+            } {
+                if block_or_tx_range.start <= snapshot_upper_bound {
+                    let end = block_or_tx_range.end.min(snapshot_upper_bound + 1);
+                    data.extend(fetch_from_snapshot(
+                        snapshot_provider,
+                        block_or_tx_range.start..end,
+                        &mut predicate,
+                    )?);
+                    block_or_tx_range.start = end;
+                }
+            }
+        }
+
+        if block_or_tx_range.end > block_or_tx_range.start {
+            data.extend(fetch_from_database(block_or_tx_range, predicate)?)
+        }
+
+        Ok(data)
+    }
+
+    /// Retrieves data from the database or snapshot, wherever it's available.
+    ///
+    /// # Arguments
+    /// * `segment` - The segment of the snapshot to check against.
+    /// * `index_key` - Requested index key, usually a block or transaction number.
+    /// * `fetch_from_snapshot` - A closure that defines how to fetch the data from the snapshot
+    ///   provider.
+    /// * `fetch_from_database` - A closure that defines how to fetch the data from the database
+    ///   when the snapshot doesn't contain the required data or is not available.
+    fn get_with_snapshot<T, FS, FD>(
+        &self,
+        segment: SnapshotSegment,
+        number: u64,
+        fetch_from_snapshot: FS,
+        fetch_from_database: FD,
+    ) -> ProviderResult<Option<T>>
+    where
+        FS: Fn(&SnapshotProvider) -> ProviderResult<Option<T>>,
+        FD: Fn() -> ProviderResult<Option<T>>,
+    {
+        if let Some(provider) = &self.snapshot_provider {
+            // If there is, check the maximum block or transaction number of the segment.
+            let snapshot_upper_bound = match segment {
+                SnapshotSegment::Headers => provider.get_highest_snapshot_block(segment),
+                SnapshotSegment::Transactions | SnapshotSegment::Receipts => {
+                    provider.get_highest_snapshot_tx(segment)
+                }
+            };
+
+            if snapshot_upper_bound
+                .map_or(false, |snapshot_upper_bound| snapshot_upper_bound >= number)
+            {
+                return fetch_from_snapshot(provider);
+            }
+        }
+        fetch_from_database()
+    }
+
+    fn transactions_by_tx_range_with_cursor<C>(
+        &self,
+        range: impl RangeBounds<TxNumber>,
+        cursor: &mut C,
+    ) -> ProviderResult<Vec<TransactionSignedNoHash>>
+    where
+        C: DbCursorRO<tables::Transactions>,
+    {
+        self.get_range_with_snapshot(
+            SnapshotSegment::Transactions,
+            to_range(range),
+            |snapshot, range, _| snapshot.transactions_by_tx_range(range),
+            |range, _| self.cursor_collect(cursor, range, Ok),
+            |_| true,
+        )
+    }
 }

 impl<TX: DbTxMut + DbTx> DatabaseProvider<TX> {
@@ -981,7 +1089,12 @@ impl<TX: DbTx> HeaderProvider for DatabaseProvider<TX> {
    }

    fn header_by_number(&self, num: BlockNumber) -> ProviderResult<Option<Header>> {
-        Ok(self.tx.get::<tables::Headers>(num)?)
+        self.get_with_snapshot(
+            SnapshotSegment::Headers,
+            num,
+            |snapshot| snapshot.header_by_number(num),
+            || Ok(self.tx.get::<tables::Headers>(num)?),
+        )
    }

    fn header_td(&self, block_hash: &BlockHash) -> ProviderResult<Option<U256>> {
@@ -999,52 +1112,81 @@ impl<TX: DbTx> HeaderProvider for DatabaseProvider<TX> {
            return Ok(Some(td));
        }

-        Ok(self.tx.get::<tables::HeaderTD>(number)?.map(|td| td.0))
+        self.get_with_snapshot(
+            SnapshotSegment::Headers,
+            number,
+            |snapshot| snapshot.header_td_by_number(number),
+            || Ok(self.tx.get::<tables::HeaderTD>(number)?.map(|td| td.0)),
+        )
    }

    fn headers_range(&self, range: impl RangeBounds<BlockNumber>) -> ProviderResult<Vec<Header>> {
-        let mut cursor = self.tx.cursor_read::<tables::Headers>()?;
-        cursor
-            .walk_range(range)?
-            .map(|result| result.map(|(_, header)| header).map_err(Into::into))
-            .collect::<ProviderResult<Vec<_>>>()
+        self.get_range_with_snapshot(
+            SnapshotSegment::Headers,
+            to_range(range),
+            |snapshot, range, _| snapshot.headers_range(range),
+            |range, _| {
+                self.cursor_read_collect::<tables::Headers, _>(range, Ok).map_err(Into::into)
+            },
+            |_| true,
+        )
    }

    fn sealed_header(&self, number: BlockNumber) -> ProviderResult<Option<SealedHeader>> {
-        if let Some(header) = self.header_by_number(number)? {
-            let hash = self
-                .block_hash(number)?
-                .ok_or_else(|| ProviderError::HeaderNotFound(number.into()))?;
-            Ok(Some(header.seal(hash)))
-        } else {
-            Ok(None)
-        }
+        self.get_with_snapshot(
+            SnapshotSegment::Headers,
+            number,
+            |snapshot| snapshot.sealed_header(number),
+            || {
+                if let Some(header) = self.header_by_number(number)? {
+                    let hash = self
+                        .block_hash(number)?
+                        .ok_or_else(|| ProviderError::HeaderNotFound(number.into()))?;
+                    Ok(Some(header.seal(hash)))
+                } else {
+                    Ok(None)
+                }
+            },
+        )
    }

    fn sealed_headers_while(
        &self,
        range: impl RangeBounds<BlockNumber>,
-        mut predicate: impl FnMut(&SealedHeader) -> bool,
+        predicate: impl FnMut(&SealedHeader) -> bool,
    ) -> ProviderResult<Vec<SealedHeader>> {
-        let mut headers = vec![];
-        for entry in self.tx.cursor_read::<tables::Headers>()?.walk_range(range)? {
-            let (number, header) = entry?;
-            let hash = self
-                .block_hash(number)?
-                .ok_or_else(|| ProviderError::HeaderNotFound(number.into()))?;
-            let sealed = header.seal(hash);
-            if !predicate(&sealed) {
-                break;
-            }
-            headers.push(sealed);
-        }
-        Ok(headers)
+        self.get_range_with_snapshot(
+            SnapshotSegment::Headers,
+            to_range(range),
+            |snapshot, range, predicate| snapshot.sealed_headers_while(range, predicate),
+            |range, mut predicate| {
+                let mut headers = vec![];
+                for entry in self.tx.cursor_read::<tables::Headers>()?.walk_range(range)? {
+                    let (number, header) = entry?;
+                    let hash = self
+                        .block_hash(number)?
+                        .ok_or_else(|| ProviderError::HeaderNotFound(number.into()))?;
+                    let sealed = header.seal(hash);
+                    if !predicate(&sealed) {
+                        break
+                    }
+                    headers.push(sealed);
+                }
+                Ok(headers)
+            },
+            predicate,
+        )
    }
 }

 impl<TX: DbTx> BlockHashReader for DatabaseProvider<TX> {
    fn block_hash(&self, number: u64) -> ProviderResult<Option<B256>> {
-        Ok(self.tx.get::<tables::CanonicalHeaders>(number)?)
+        self.get_with_snapshot(
+            SnapshotSegment::Headers,
+            number,
+            |snapshot| snapshot.block_hash(number),
+            || Ok(self.tx.get::<tables::CanonicalHeaders>(number)?),
+        )
    }

    fn canonical_hashes_range(
@@ -1052,12 +1194,16 @@ impl<TX: DbTx> BlockHashReader for DatabaseProvider<TX> {
        start: BlockNumber,
        end: BlockNumber,
    ) -> ProviderResult<Vec<B256>> {
-        let range = start..end;
-        let mut cursor = self.tx.cursor_read::<tables::CanonicalHeaders>()?;
-        cursor
-            .walk_range(range)?
-            .map(|result| result.map(|(_, hash)| hash).map_err(Into::into))
-            .collect::<ProviderResult<Vec<_>>>()
+        self.get_range_with_snapshot(
+            SnapshotSegment::Headers,
+            start..end,
+            |snapshot, range, _| snapshot.canonical_hashes_range(range.start, range.end),
+            |range, _| {
+                self.cursor_read_collect::<tables::CanonicalHeaders, _>(range, Ok)
+                    .map_err(Into::into)
+            },
+            |_| true,
+        )
    }
 }

@@ -1227,7 +1373,14 @@ impl<TX: DbTx> BlockReader for DatabaseProvider<TX> {
                // we skip the block.
                if let Some((_, block_body_indices)) = block_body_cursor.seek_exact(num)? {
                    let tx_range = block_body_indices.tx_num_range();
-                    let body = self.cursor_collect(&mut tx_cursor, tx_range, |tx| Ok(tx.into()))?;
+                    let body = if tx_range.is_empty() {
+                        Vec::new()
+                    } else {
+                        self.transactions_by_tx_range_with_cursor(tx_range, &mut tx_cursor)?
+                            .into_iter()
+                            .map(Into::into)
+                            .collect()
+                    };

                    // If we are past shanghai, then all blocks should have a withdrawal list,
                    // even if empty
@@ -1263,53 +1416,63 @@ impl<TX: DbTx> TransactionsProviderExt for DatabaseProvider<TX> {
        &self,
        tx_range: Range<TxNumber>,
    ) -> ProviderResult<Vec<(TxHash, TxNumber)>> {
-        let mut tx_cursor = self.tx.cursor_read::<tables::Transactions>()?;
-        let tx_range_size = tx_range.clone().count();
-        let tx_walker = tx_cursor.walk_range(tx_range)?;
+        self.get_range_with_snapshot(
+            SnapshotSegment::Transactions,
+            tx_range,
+            |snapshot, range, _| snapshot.transaction_hashes_by_range(range),
+            |tx_range, _| {
+                let mut tx_cursor = self.tx.cursor_read::<tables::Transactions>()?;
+                let tx_range_size = tx_range.clone().count();
+                let tx_walker = tx_cursor.walk_range(tx_range)?;

-        let chunk_size = (tx_range_size / rayon::current_num_threads()).max(1);
-        let mut channels = Vec::with_capacity(chunk_size);
-        let mut transaction_count = 0;
+                let chunk_size = (tx_range_size / rayon::current_num_threads()).max(1);
+                let mut channels = Vec::with_capacity(chunk_size);
+                let mut transaction_count = 0;

-        #[inline]
-        fn calculate_hash(
-            entry: Result<(TxNumber, TransactionSignedNoHash), DatabaseError>,
-            rlp_buf: &mut Vec<u8>,
-        ) -> Result<(B256, TxNumber), Box<ProviderError>> {
-            let (tx_id, tx) = entry.map_err(|e| Box::new(e.into()))?;
-            tx.transaction.encode_with_signature(&tx.signature, rlp_buf, false);
-            Ok((keccak256(rlp_buf), tx_id))
-        }
-
-        for chunk in &tx_walker.chunks(chunk_size) {
-            let (tx, rx) = mpsc::channel();
-            channels.push(rx);
-
-            // Note: Unfortunate side-effect of how chunk is designed in itertools (it is not Send)
-            let chunk: Vec<_> = chunk.collect();
-            transaction_count += chunk.len();
-
-            // Spawn the task onto the global rayon pool
-            // This task will send the results through the channel after it has calculated the hash.
-            rayon::spawn(move || {
-                let mut rlp_buf = Vec::with_capacity(128);
-                for entry in chunk {
-                    rlp_buf.clear();
-                    let _ = tx.send(calculate_hash(entry, &mut rlp_buf));
+                #[inline]
+                fn calculate_hash(
+                    entry: Result<(TxNumber, TransactionSignedNoHash), DatabaseError>,
+                    rlp_buf: &mut Vec<u8>,
+                ) -> Result<(B256, TxNumber), Box<ProviderError>> {
+                    let (tx_id, tx) = entry.map_err(|e| Box::new(e.into()))?;
+                    tx.transaction.encode_with_signature(&tx.signature, rlp_buf, false);
+                    Ok((keccak256(rlp_buf), tx_id))
                }
-            });
-        }
-        let mut tx_list = Vec::with_capacity(transaction_count);

-        // Iterate over channels and append the tx hashes unsorted
-        for channel in channels {
-            while let Ok(tx) = channel.recv() {
-                let (tx_hash, tx_id) = tx.map_err(|boxed| *boxed)?;
-                tx_list.push((tx_hash, tx_id));
-            }
-        }
+                for chunk in &tx_walker.chunks(chunk_size) {
+                    let (tx, rx) = mpsc::channel();
+                    channels.push(rx);

-        Ok(tx_list)
+                    // Note: Unfortunate side-effect of how chunk is designed in itertools (it is
+                    // not Send)
+                    let chunk: Vec<_> = chunk.collect();
+                    transaction_count += chunk.len();
+
+                    // Spawn the task onto the global rayon pool
+                    // This task will send the results through the channel after it has calculated
+                    // the hash.
+                    rayon::spawn(move || {
+                        let mut rlp_buf = Vec::with_capacity(128);
+                        for entry in chunk {
+                            rlp_buf.clear();
+                            let _ = tx.send(calculate_hash(entry, &mut rlp_buf));
+                        }
+                    });
+                }
+                let mut tx_list = Vec::with_capacity(transaction_count);
+
+                // Iterate over channels and append the tx hashes unsorted
+                for channel in channels {
+                    while let Ok(tx) = channel.recv() {
+                        let (tx_hash, tx_id) = tx.map_err(|boxed| *boxed)?;
+                        tx_list.push((tx_hash, tx_id));
+                    }
+                }
+
+                Ok(tx_list)
+            },
+            |_| true,
+        )
    }
 }

@@ -1321,14 +1484,24 @@ impl<TX: DbTx> TransactionsProvider for DatabaseProvider<TX> {
    }

    fn transaction_by_id(&self, id: TxNumber) -> ProviderResult<Option<TransactionSigned>> {
-        Ok(self.tx.get::<tables::Transactions>(id)?.map(Into::into))
+        self.get_with_snapshot(
+            SnapshotSegment::Transactions,
+            id,
+            |snapshot| snapshot.transaction_by_id(id),
+            || Ok(self.tx.get::<tables::Transactions>(id)?.map(Into::into)),
+        )
    }

    fn transaction_by_id_no_hash(
        &self,
        id: TxNumber,
    ) -> ProviderResult<Option<TransactionSignedNoHash>> {
-        Ok(self.tx.get::<tables::Transactions>(id)?)
+        self.get_with_snapshot(
+            SnapshotSegment::Transactions,
+            id,
+            |snapshot| snapshot.transaction_by_id_no_hash(id),
+            || Ok(self.tx.get::<tables::Transactions>(id)?),
+        )
    }

    fn transaction_by_hash(&self, hash: TxHash) -> ProviderResult<Option<TransactionSigned>> {
@@ -1396,14 +1569,21 @@ impl<TX: DbTx> TransactionsProvider for DatabaseProvider<TX> {
        &self,
        id: BlockHashOrNumber,
    ) -> ProviderResult<Option<Vec<TransactionSigned>>> {
+        let mut tx_cursor = self.tx.cursor_read::<tables::Transactions>()?;
+
        if let Some(block_number) = self.convert_hash_or_number(id)? {
            if let Some(body) = self.block_body_indices(block_number)? {
-                return self
-                    .cursor_read_collect::<tables::Transactions, _>(body.tx_num_range(), |tx| {
-                        Ok(tx.into())
-                    })
-                    .map(Some)
-                    .map_err(Into::into);
+                let tx_range = body.tx_num_range();
+                return if tx_range.is_empty() {
+                    Ok(Some(Vec::new()))
+                } else {
+                    Ok(Some(
+                        self.transactions_by_tx_range_with_cursor(tx_range, &mut tx_cursor)?
+                            .into_iter()
+                            .map(Into::into)
+                            .collect(),
+                    ))
+                }
            }
        }
        Ok(None)
@@ -1414,17 +1594,33 @@ impl<TX: DbTx> TransactionsProvider for DatabaseProvider<TX> {
        range: impl RangeBounds<BlockNumber>,
    ) -> ProviderResult<Vec<Vec<TransactionSigned>>> {
        let mut tx_cursor = self.tx.cursor_read::<tables::Transactions>()?;
-        self.cursor_read_collect::<tables::BlockBodyIndices, _>(range, |body| {
-            self.cursor_collect(&mut tx_cursor, body.tx_num_range(), |tx| Ok(tx.into()))
-        })
-        .map_err(Into::into)
+        let mut results = Vec::new();
+        let mut body_cursor = self.tx.cursor_read::<tables::BlockBodyIndices>()?;
+        for entry in body_cursor.walk_range(range)? {
+            let (_, body) = entry?;
+            let tx_num_range = body.tx_num_range();
+            if tx_num_range.is_empty() {
+                results.push(Vec::new());
+            } else {
+                results.push(
+                    self.transactions_by_tx_range_with_cursor(tx_num_range, &mut tx_cursor)?
+                        .into_iter()
+                        .map(Into::into)
+                        .collect(),
+                );
+            }
+        }
+        Ok(results)
    }

    fn transactions_by_tx_range(
        &self,
        range: impl RangeBounds<TxNumber>,
    ) -> ProviderResult<Vec<TransactionSignedNoHash>> {
-        self.cursor_read_collect::<tables::Transactions, _>(range, Ok).map_err(Into::into)
+        self.transactions_by_tx_range_with_cursor(
+            range,
+            &mut self.tx.cursor_read::<tables::Transactions>()?,
+        )
    }

    fn senders_by_tx_range(
@@ -1441,7 +1637,12 @@ impl<TX: DbTx> TransactionsProvider for DatabaseProvider<TX> {

 impl<TX: DbTx> ReceiptProvider for DatabaseProvider<TX> {
    fn receipt(&self, id: TxNumber) -> ProviderResult<Option<Receipt>> {
-        Ok(self.tx.get::<tables::Receipts>(id)?)
+        self.get_with_snapshot(
+            SnapshotSegment::Receipts,
+            id,
+            |snapshot| snapshot.receipt(id),
+            || Ok(self.tx.get::<tables::Receipts>(id)?),
+        )
    }

    fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult<Option<Receipt>> {
@@ -1455,14 +1656,31 @@ impl<TX: DbTx> ReceiptProvider for DatabaseProvider<TX> {
    fn receipts_by_block(&self, block: BlockHashOrNumber) -> ProviderResult<Option<Vec<Receipt>>> {
        if let Some(number) = self.convert_hash_or_number(block)? {
            if let Some(body) = self.block_body_indices(number)? {
-                return self
-                    .cursor_read_collect::<tables::Receipts, _>(body.tx_num_range(), Ok)
-                    .map(Some)
-                    .map_err(Into::into);
+                let tx_range = body.tx_num_range();
+                return if tx_range.is_empty() {
+                    Ok(Some(Vec::new()))
+                } else {
+                    self.receipts_by_tx_range(tx_range).map(Some)
+                }
            }
        }
        Ok(None)
    }
+
+    fn receipts_by_tx_range(
+        &self,
+        range: impl RangeBounds<TxNumber>,
+    ) -> ProviderResult<Vec<Receipt>> {
+        self.get_range_with_snapshot(
+            SnapshotSegment::Receipts,
+            to_range(range),
+            |snapshot, range, _| snapshot.receipts_by_tx_range(range),
+            |range, _| {
+                self.cursor_read_collect::<tables::Receipts, _>(range, Ok).map_err(Into::into)
+            },
+            |_| true,
+        )
+    }
 }

 impl<TX: DbTx> WithdrawalsProvider for DatabaseProvider<TX> {
--- a/crates/storage/provider/src/providers/mod.rs
+++ b/crates/storage/provider/src/providers/mod.rs
@@ -374,6 +374,13 @@ where
    fn receipts_by_block(&self, block: BlockHashOrNumber) -> ProviderResult<Option<Vec<Receipt>>> {
        self.database.provider()?.receipts_by_block(block)
    }
+
+    fn receipts_by_tx_range(
+        &self,
+        range: impl RangeBounds<TxNumber>,
+    ) -> ProviderResult<Vec<Receipt>> {
+        self.database.provider()?.receipts_by_tx_range(range)
+    }
 }
 impl<DB, Tree> ReceiptProviderIdExt for BlockchainProvider<DB, Tree>
 where
--- a/crates/storage/provider/src/providers/snapshot/jar.rs
+++ b/crates/storage/provider/src/providers/snapshot/jar.rs
@@ -1,6 +1,7 @@
 use super::LoadedJarRef;
 use crate::{
-    BlockHashReader, BlockNumReader, HeaderProvider, ReceiptProvider, TransactionsProvider,
+    to_range, BlockHashReader, BlockNumReader, HeaderProvider, ReceiptProvider,
+    TransactionsProvider,
 };
 use reth_db::{
    codecs::CompactU256,
@@ -11,7 +12,7 @@ use reth_primitives::{
    Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo, Header, Receipt, SealedHeader,
    TransactionMeta, TransactionSigned, TransactionSignedNoHash, TxHash, TxNumber, B256, U256,
 };
-use std::ops::{Deref, Range, RangeBounds};
+use std::ops::{Deref, RangeBounds};

 /// Provider over a specific `NippyJar` and range.
 #[derive(Debug)]
@@ -285,20 +286,20 @@ impl<'a> ReceiptProvider for SnapshotJarProvider<'a> {
        // provider with `receipt()` instead for each
        Err(ProviderError::UnsupportedProvider)
    }
-}
-
-fn to_range<R: RangeBounds<u64>>(bounds: R) -> Range<u64> {
-    let start = match bounds.start_bound() {
-        std::ops::Bound::Included(&v) => v,
-        std::ops::Bound::Excluded(&v) => v + 1,
-        std::ops::Bound::Unbounded => 0,
-    };
-
-    let end = match bounds.end_bound() {
-        std::ops::Bound::Included(&v) => v + 1,
-        std::ops::Bound::Excluded(&v) => v,
-        std::ops::Bound::Unbounded => u64::MAX,
-    };
-
-    start..end
+
+    fn receipts_by_tx_range(
+        &self,
+        range: impl RangeBounds<TxNumber>,
+    ) -> ProviderResult<Vec<Receipt>> {
+        let range = to_range(range);
+        let mut cursor = self.cursor()?;
+        let mut receipts = Vec::with_capacity((range.end - range.start) as usize);
+
+        for num in range {
+            if let Some(tx) = cursor.get_one::<ReceiptMask<Receipt>>(num.into())? {
+                receipts.push(tx)
+            }
+        }
+        Ok(receipts)
+    }
 }
--- a/crates/storage/provider/src/providers/snapshot/manager.rs
+++ b/crates/storage/provider/src/providers/snapshot/manager.rs
@@ -1,22 +1,27 @@
 use super::{LoadedJar, SnapshotJarProvider};
-use crate::{BlockHashReader, BlockNumReader, HeaderProvider, TransactionsProvider};
+use crate::{
+    to_range, BlockHashReader, BlockNumReader, BlockReader, BlockSource, HeaderProvider,
+    ReceiptProvider, TransactionVariant, TransactionsProvider, TransactionsProviderExt,
+    WithdrawalsProvider,
+};
 use dashmap::DashMap;
 use parking_lot::RwLock;
 use reth_db::{
    codecs::CompactU256,
-    snapshot::{HeaderMask, TransactionMask},
+    models::StoredBlockBodyIndices,
+    snapshot::{iter_snapshots, HeaderMask, ReceiptMask, SnapshotCursor, TransactionMask},
 };
 use reth_interfaces::provider::{ProviderError, ProviderResult};
 use reth_nippy_jar::NippyJar;
 use reth_primitives::{
-    snapshot::HighestSnapshots, Address, BlockHash, BlockHashOrNumber, BlockNumber, ChainInfo,
-    Header, SealedHeader, SnapshotSegment, TransactionMeta, TransactionSigned,
-    TransactionSignedNoHash, TxHash, TxNumber, B256, U256,
+    snapshot::HighestSnapshots, Address, Block, BlockHash, BlockHashOrNumber, BlockNumber,
+    BlockWithSenders, ChainInfo, Header, Receipt, SealedBlock, SealedBlockWithSenders,
+    SealedHeader, SnapshotSegment, TransactionMeta, TransactionSigned, TransactionSignedNoHash,
+    TxHash, TxNumber, Withdrawal, B256, U256,
 };
-use revm::primitives::HashMap;
 use std::{
-    collections::BTreeMap,
-    ops::{RangeBounds, RangeInclusive},
+    collections::{hash_map::Entry, BTreeMap, HashMap},
+    ops::{Range, RangeBounds, RangeInclusive},
    path::{Path, PathBuf},
 };
 use tokio::sync::watch;
@@ -36,26 +41,39 @@ pub struct SnapshotProvider {
    /// Maintains a map which allows for concurrent access to different `NippyJars`, over different
    /// segments and ranges.
    map: DashMap<(BlockNumber, SnapshotSegment), LoadedJar>,
-    /// Available snapshot ranges on disk indexed by max blocks.
+    /// Available snapshot transaction ranges on disk indexed by max blocks.
    snapshots_block_index: RwLock<SegmentRanges>,
-    /// Available snapshot ranges on disk indexed by max transactions.
+    /// Available snapshot block ranges on disk indexed by max transactions.
    snapshots_tx_index: RwLock<SegmentRanges>,
    /// Tracks the highest snapshot of every segment.
    highest_tracker: Option<watch::Receiver<Option<HighestSnapshots>>>,
    /// Directory where snapshots are located
    path: PathBuf,
+    /// Whether [`SnapshotJarProvider`] loads filters into memory. If not, `by_hash` queries won't
+    /// be able to be queried directly.
+    load_filters: bool,
 }

 impl SnapshotProvider {
    /// Creates a new [`SnapshotProvider`].
-    pub fn new(path: impl AsRef<Path>) -> Self {
-        Self {
+    pub fn new(path: impl AsRef<Path>) -> ProviderResult<Self> {
+        let provider = Self {
            map: Default::default(),
            snapshots_block_index: Default::default(),
            snapshots_tx_index: Default::default(),
            highest_tracker: None,
            path: path.as_ref().to_path_buf(),
-        }
+            load_filters: false,
+        };
+
+        provider.update_index()?;
+        Ok(provider)
+    }
+
+    /// Loads filters into memory when creating a [`SnapshotJarProvider`].
+    pub fn with_filters(mut self) -> Self {
+        self.load_filters = true;
+        self
    }

    /// Adds a highest snapshot tracker to the provider
@@ -141,12 +159,15 @@ impl SnapshotProvider {
        if let Some(jar) = self.map.get(&key) {
            Ok(jar.into())
        } else {
-            self.map.insert(
-                key,
-                LoadedJar::new(NippyJar::load(
-                    &self.path.join(segment.filename(block_range, tx_range)),
-                )?)?,
-            );
+            let jar = NippyJar::load(&self.path.join(segment.filename(block_range, tx_range)))
+                .map(|jar| {
+                if self.load_filters {
+                    return jar.load_filters()
+                }
+                Ok(jar)
+            })??;
+
+            self.map.insert(key, LoadedJar::new(jar)?);
            Ok(self.map.get(&key).expect("qed").into())
        }
    }
@@ -166,6 +187,10 @@ impl SnapshotProvider {
        let mut snapshots_rev_iter = segment_snapshots.iter().rev().peekable();

        while let Some((block_end, tx_range)) = snapshots_rev_iter.next() {
+            if block > *block_end {
+                // request block is higher than highest snapshot block
+                return None
+            }
            // `unwrap_or(0) is safe here as it sets block_start to 0 if the iterator is empty,
            // indicating the lowest height snapshot has been reached.
            let block_start =
@@ -192,6 +217,10 @@ impl SnapshotProvider {
        let mut snapshots_rev_iter = segment_snapshots.iter().rev().peekable();

        while let Some((tx_end, block_range)) = snapshots_rev_iter.next() {
+            if tx > *tx_end {
+                // request tx is higher than highest snapshot tx
+                return None
+            }
            let tx_start = snapshots_rev_iter.peek().map(|(tx_end, _)| *tx_end + 1).unwrap_or(0);
            if tx_start <= tx {
                return Some((block_range.clone(), tx_start..=*tx_end))
@@ -200,11 +229,53 @@ impl SnapshotProvider {
        None
    }

-    /// Gets the highest snapshot if it exists for a snapshot segment.
-    pub fn get_highest_snapshot(&self, segment: SnapshotSegment) -> Option<BlockNumber> {
-        self.highest_tracker
-            .as_ref()
-            .and_then(|tracker| tracker.borrow().and_then(|highest| highest.highest(segment)))
+    /// Updates the inner transaction and block index
+    pub fn update_index(&self) -> ProviderResult<()> {
+        let mut block_index = self.snapshots_block_index.write();
+        let mut tx_index = self.snapshots_tx_index.write();
+
+        for (segment, ranges) in iter_snapshots(&self.path)? {
+            for (block_range, tx_range) in ranges {
+                let block_end = *block_range.end();
+                let tx_end = *tx_range.end();
+
+                match tx_index.entry(segment) {
+                    Entry::Occupied(mut index) => {
+                        index.get_mut().insert(tx_end, block_range);
+                    }
+                    Entry::Vacant(index) => {
+                        index.insert(BTreeMap::from([(tx_end, block_range)]));
+                    }
+                };
+
+                match block_index.entry(segment) {
+                    Entry::Occupied(mut index) => {
+                        index.get_mut().insert(block_end, tx_range);
+                    }
+                    Entry::Vacant(index) => {
+                        index.insert(BTreeMap::from([(block_end, tx_range)]));
+                    }
+                };
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Gets the highest snapshot block if it exists for a snapshot segment.
+    pub fn get_highest_snapshot_block(&self, segment: SnapshotSegment) -> Option<BlockNumber> {
+        self.snapshots_block_index
+            .read()
+            .get(&segment)
+            .and_then(|index| index.last_key_value().map(|(last_block, _)| *last_block))
+    }
+
+    /// Gets the highest snapshotted transaction.
+    pub fn get_highest_snapshot_tx(&self, segment: SnapshotSegment) -> Option<TxNumber> {
+        self.snapshots_tx_index
+            .read()
+            .get(&segment)
+            .and_then(|index| index.last_key_value().map(|(last_tx, _)| *last_tx))
    }

    /// Iterates through segment snapshots in reverse order, executing a function until it returns
@@ -238,6 +309,56 @@ impl SnapshotProvider {

        Ok(None)
    }
+
+    /// Fetches data within a specified range across multiple snapshot files.
+    ///
+    /// This function iteratively retrieves data using `get_fn` for each item in the given range.
+    /// It continues fetching until the end of the range is reached or the provided `predicate`
+    /// returns false.
+    pub fn fetch_range<T, F, P>(
+        &self,
+        segment: SnapshotSegment,
+        range: Range<u64>,
+        get_fn: F,
+        mut predicate: P,
+    ) -> ProviderResult<Vec<T>>
+    where
+        F: Fn(&mut SnapshotCursor<'_>, u64) -> ProviderResult<Option<T>>,
+        P: FnMut(&T) -> bool,
+    {
+        let get_provider = |start: u64| match segment {
+            SnapshotSegment::Headers => self.get_segment_provider_from_block(segment, start, None),
+            SnapshotSegment::Transactions | SnapshotSegment::Receipts => {
+                self.get_segment_provider_from_transaction(segment, start, None)
+            }
+        };
+
+        let mut result = Vec::with_capacity((range.end - range.start).min(100) as usize);
+        let mut provider = get_provider(range.start)?;
+        let mut cursor = provider.cursor()?;
+
+        // advances number in range
+        'outer: for number in range {
+            // advances snapshot files if `get_fn` returns None
+            'inner: loop {
+                match get_fn(&mut cursor, number)? {
+                    Some(res) => {
+                        if !predicate(&res) {
+                            break 'outer
+                        }
+                        result.push(res);
+                        break 'inner;
+                    }
+                    None => {
+                        provider = get_provider(number)?;
+                        cursor = provider.cursor()?;
+                    }
+                }
+            }
+        }
+
+        Ok(result)
+    }
 }

 impl HeaderProvider for SnapshotProvider {
@@ -274,8 +395,13 @@ impl HeaderProvider for SnapshotProvider {
            .header_td_by_number(num)
    }

-    fn headers_range(&self, _range: impl RangeBounds<BlockNumber>) -> ProviderResult<Vec<Header>> {
-        todo!();
+    fn headers_range(&self, range: impl RangeBounds<BlockNumber>) -> ProviderResult<Vec<Header>> {
+        self.fetch_range(
+            SnapshotSegment::Headers,
+            to_range(range),
+            |cursor, number| cursor.get_one::<HeaderMask<Header>>(number.into()),
+            |_| true,
+        )
    }

    fn sealed_header(&self, num: BlockNumber) -> ProviderResult<Option<SealedHeader>> {
@@ -285,10 +411,19 @@ impl HeaderProvider for SnapshotProvider {

    fn sealed_headers_while(
        &self,
-        _range: impl RangeBounds<BlockNumber>,
-        _predicate: impl FnMut(&SealedHeader) -> bool,
+        range: impl RangeBounds<BlockNumber>,
+        predicate: impl FnMut(&SealedHeader) -> bool,
    ) -> ProviderResult<Vec<SealedHeader>> {
-        todo!()
+        self.fetch_range(
+            SnapshotSegment::Headers,
+            to_range(range),
+            |cursor, number| {
+                Ok(cursor
+                    .get_two::<HeaderMask<Header, BlockHash>>(number.into())?
+                    .map(|(header, hash)| header.seal(hash)))
+            },
+            predicate,
+        )
    }
 }

@@ -299,28 +434,63 @@ impl BlockHashReader for SnapshotProvider {

    fn canonical_hashes_range(
        &self,
-        _start: BlockNumber,
-        _end: BlockNumber,
+        start: BlockNumber,
+        end: BlockNumber,
    ) -> ProviderResult<Vec<B256>> {
-        todo!()
+        self.fetch_range(
+            SnapshotSegment::Headers,
+            start..end,
+            |cursor, number| cursor.get_one::<HeaderMask<BlockHash>>(number.into()),
+            |_| true,
+        )
    }
 }

-impl BlockNumReader for SnapshotProvider {
-    fn chain_info(&self) -> ProviderResult<ChainInfo> {
-        todo!()
+impl ReceiptProvider for SnapshotProvider {
+    fn receipt(&self, num: TxNumber) -> ProviderResult<Option<Receipt>> {
+        self.get_segment_provider_from_transaction(SnapshotSegment::Receipts, num, None)?
+            .receipt(num)
    }

-    fn best_block_number(&self) -> ProviderResult<BlockNumber> {
-        todo!()
+    fn receipt_by_hash(&self, hash: TxHash) -> ProviderResult<Option<Receipt>> {
+        if let Some(num) = self.transaction_id(hash)? {
+            return self.receipt(num)
+        }
+        Ok(None)
    }

-    fn last_block_number(&self) -> ProviderResult<BlockNumber> {
-        todo!()
+    fn receipts_by_block(&self, _block: BlockHashOrNumber) -> ProviderResult<Option<Vec<Receipt>>> {
+        unreachable!()
    }

-    fn block_number(&self, _hash: B256) -> ProviderResult<Option<BlockNumber>> {
-        todo!()
+    fn receipts_by_tx_range(
+        &self,
+        range: impl RangeBounds<TxNumber>,
+    ) -> ProviderResult<Vec<Receipt>> {
+        self.fetch_range(
+            SnapshotSegment::Receipts,
+            to_range(range),
+            |cursor, number| cursor.get_one::<ReceiptMask<Receipt>>(number.into()),
+            |_| true,
+        )
+    }
+}
+
+impl TransactionsProviderExt for SnapshotProvider {
+    fn transaction_hashes_by_range(
+        &self,
+        tx_range: Range<TxNumber>,
+    ) -> ProviderResult<Vec<(TxHash, TxNumber)>> {
+        self.fetch_range(
+            SnapshotSegment::Transactions,
+            tx_range,
+            |cursor, number| {
+                let tx =
+                    cursor.get_one::<TransactionMask<TransactionSignedNoHash>>(number.into())?;
+                Ok(tx.map(|tx| (tx.hash(), cursor.number())))
+            },
+            |_| true,
+        )
    }
 }

@@ -367,42 +537,150 @@ impl TransactionsProvider for SnapshotProvider {
        &self,
        _hash: TxHash,
    ) -> ProviderResult<Option<(TransactionSigned, TransactionMeta)>> {
-        todo!()
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
    }

    fn transaction_block(&self, _id: TxNumber) -> ProviderResult<Option<BlockNumber>> {
-        todo!()
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
    }

    fn transactions_by_block(
        &self,
        _block_id: BlockHashOrNumber,
    ) -> ProviderResult<Option<Vec<TransactionSigned>>> {
-        todo!()
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
    }

    fn transactions_by_block_range(
        &self,
        _range: impl RangeBounds<BlockNumber>,
    ) -> ProviderResult<Vec<Vec<TransactionSigned>>> {
-        todo!()
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
    }

    fn senders_by_tx_range(
        &self,
-        _range: impl RangeBounds<TxNumber>,
+        range: impl RangeBounds<TxNumber>,
    ) -> ProviderResult<Vec<Address>> {
-        todo!()
+        let txes = self.transactions_by_tx_range(range)?;
+        TransactionSignedNoHash::recover_signers(&txes, txes.len())
+            .ok_or(ProviderError::SenderRecoveryError)
    }

    fn transactions_by_tx_range(
        &self,
-        _range: impl RangeBounds<TxNumber>,
+        range: impl RangeBounds<TxNumber>,
    ) -> ProviderResult<Vec<reth_primitives::TransactionSignedNoHash>> {
-        todo!()
+        self.fetch_range(
+            SnapshotSegment::Transactions,
+            to_range(range),
+            |cursor, number| {
+                cursor.get_one::<TransactionMask<TransactionSignedNoHash>>(number.into())
+            },
+            |_| true,
+        )
    }

    fn transaction_sender(&self, id: TxNumber) -> ProviderResult<Option<Address>> {
        Ok(self.transaction_by_id_no_hash(id)?.and_then(|tx| tx.recover_signer()))
    }
 }
+
+/* Cannot be successfully implemented but must exist for trait requirements */
+
+impl BlockNumReader for SnapshotProvider {
+    fn chain_info(&self) -> ProviderResult<ChainInfo> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn best_block_number(&self) -> ProviderResult<BlockNumber> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn last_block_number(&self) -> ProviderResult<BlockNumber> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn block_number(&self, _hash: B256) -> ProviderResult<Option<BlockNumber>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+}
+
+impl BlockReader for SnapshotProvider {
+    fn find_block_by_hash(
+        &self,
+        _hash: B256,
+        _source: BlockSource,
+    ) -> ProviderResult<Option<Block>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn block(&self, _id: BlockHashOrNumber) -> ProviderResult<Option<Block>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn pending_block(&self) -> ProviderResult<Option<SealedBlock>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn pending_block_with_senders(&self) -> ProviderResult<Option<SealedBlockWithSenders>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn pending_block_and_receipts(&self) -> ProviderResult<Option<(SealedBlock, Vec<Receipt>)>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn ommers(&self, _id: BlockHashOrNumber) -> ProviderResult<Option<Vec<Header>>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn block_body_indices(&self, _num: u64) -> ProviderResult<Option<StoredBlockBodyIndices>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn block_with_senders(
+        &self,
+        _id: BlockHashOrNumber,
+        _transaction_kind: TransactionVariant,
+    ) -> ProviderResult<Option<BlockWithSenders>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn block_range(&self, _range: RangeInclusive<BlockNumber>) -> ProviderResult<Vec<Block>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+}
+
+impl WithdrawalsProvider for SnapshotProvider {
+    fn withdrawals_by_block(
+        &self,
+        _id: BlockHashOrNumber,
+        _timestamp: u64,
+    ) -> ProviderResult<Option<Vec<Withdrawal>>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+
+    fn latest_withdrawal(&self) -> ProviderResult<Option<Withdrawal>> {
+        // Required data not present in snapshots
+        Err(ProviderError::UnsupportedProvider)
+    }
+}
--- a/crates/storage/provider/src/providers/snapshot/mod.rs
+++ b/crates/storage/provider/src/providers/snapshot/mod.rs
@@ -7,7 +7,7 @@ pub use jar::SnapshotJarProvider;
 use reth_interfaces::provider::ProviderResult;
 use reth_nippy_jar::NippyJar;
 use reth_primitives::{snapshot::SegmentHeader, SnapshotSegment};
-use std::ops::Deref;
+use std::{ops::Deref, sync::Arc};

 /// Alias type for each specific `NippyJar`.
 type LoadedJarRef<'a> = dashmap::mapref::one::Ref<'a, (u64, SnapshotSegment), LoadedJar>;
@@ -16,17 +16,17 @@ type LoadedJarRef<'a> = dashmap::mapref::one::Ref<'a, (u64, SnapshotSegment), Lo
 #[derive(Debug)]
 pub struct LoadedJar {
    jar: NippyJar<SegmentHeader>,
-    mmap_handle: reth_nippy_jar::MmapHandle,
+    mmap_handle: Arc<reth_nippy_jar::DataReader>,
 }

 impl LoadedJar {
    fn new(jar: NippyJar<SegmentHeader>) -> ProviderResult<Self> {
-        let mmap_handle = jar.open_data()?;
+        let mmap_handle = Arc::new(jar.open_data_reader()?);
        Ok(Self { jar, mmap_handle })
    }

    /// Returns a clone of the mmap handle that can be used to instantiate a cursor.
-    fn mmap_handle(&self) -> reth_nippy_jar::MmapHandle {
+    fn mmap_handle(&self) -> Arc<reth_nippy_jar::DataReader> {
        self.mmap_handle.clone()
    }
 }
@@ -131,7 +131,7 @@ mod test {
        // Use providers to query Header data and compare if it matches
        {
            let db_provider = factory.provider().unwrap();
-            let manager = SnapshotProvider::new(snap_path.path());
+            let manager = SnapshotProvider::new(snap_path.path()).unwrap().with_filters();
            let jar_provider = manager
                .get_segment_provider_from_block(SnapshotSegment::Headers, 0, Some(&snap_file))
                .unwrap();
--- a/crates/storage/provider/src/test_utils/mock.rs
+++ b/crates/storage/provider/src/test_utils/mock.rs
@@ -344,6 +344,13 @@ impl ReceiptProvider for MockEthProvider {
    fn receipts_by_block(&self, _block: BlockHashOrNumber) -> ProviderResult<Option<Vec<Receipt>>> {
        Ok(None)
    }
+
+    fn receipts_by_tx_range(
+        &self,
+        _range: impl RangeBounds<TxNumber>,
+    ) -> ProviderResult<Vec<Receipt>> {
+        Ok(vec![])
+    }
 }

 impl ReceiptProviderIdExt for MockEthProvider {}
--- a/crates/storage/provider/src/test_utils/noop.rs
+++ b/crates/storage/provider/src/test_utils/noop.rs
@@ -222,6 +222,13 @@ impl ReceiptProvider for NoopProvider {
    fn receipts_by_block(&self, _block: BlockHashOrNumber) -> ProviderResult<Option<Vec<Receipt>>> {
        Ok(None)
    }
+
+    fn receipts_by_tx_range(
+        &self,
+        _range: impl RangeBounds<TxNumber>,
+    ) -> ProviderResult<Vec<Receipt>> {
+        Ok(vec![])
+    }
 }

 impl ReceiptProviderIdExt for NoopProvider {}
--- a/crates/storage/provider/src/traits/receipts.rs
+++ b/crates/storage/provider/src/traits/receipts.rs
@@ -1,3 +1,5 @@
+use std::ops::RangeBounds;
+
 use reth_interfaces::provider::ProviderResult;
 use reth_primitives::{BlockHashOrNumber, BlockId, BlockNumberOrTag, Receipt, TxHash, TxNumber};

@@ -20,6 +22,12 @@ pub trait ReceiptProvider: Send + Sync {
    ///
    /// Returns `None` if the block is not found.
    fn receipts_by_block(&self, block: BlockHashOrNumber) -> ProviderResult<Option<Vec<Receipt>>>;
+
+    /// Get receipts by tx range.
+    fn receipts_by_tx_range(
+        &self,
+        range: impl RangeBounds<TxNumber>,
+    ) -> ProviderResult<Vec<Receipt>>;
 }

 /// Trait extension for `ReceiptProvider`, for types that implement `BlockId` conversion.