From 28e46bfd488478e8afae5cbdcc0dbbb7f0cbfb1c Mon Sep 17 00:00:00 2001
From: nk_ysg <nk_ysg@163.com>
Date: Fri, 30 Aug 2024 15:02:14 +0800
Subject: [PATCH] chore: remove phf from static files (#10259)

Co-authored-by: joshieDo <93316087+joshieDo@users.noreply.github.com>
Co-authored-by: Matthias Seitz <matthias.seitz@outlook.de>
---
 Cargo.lock                                  |  44 ----
 crates/storage/db/src/static_file/cursor.rs |   4 +-
 crates/storage/nippy-jar/Cargo.toml         |   1 -
 crates/storage/nippy-jar/src/cursor.rs      |  67 +----
 crates/storage/nippy-jar/src/error.rs       |   4 -
 crates/storage/nippy-jar/src/lib.rs         | 255 +++-----------------
 crates/storage/nippy-jar/src/phf/fmph.rs    |  99 --------
 crates/storage/nippy-jar/src/phf/go_fmph.rs | 100 --------
 crates/storage/nippy-jar/src/phf/mod.rs     |  46 ----
 9 files changed, 40 insertions(+), 580 deletions(-)
 delete mode 100644 crates/storage/nippy-jar/src/phf/fmph.rs
 delete mode 100644 crates/storage/nippy-jar/src/phf/go_fmph.rs
 delete mode 100644 crates/storage/nippy-jar/src/phf/mod.rs

diff --git a/Cargo.lock b/Cargo.lock
index 171b6d22e5..329a5e3c08 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1233,12 +1233,6 @@ dependencies = [
  "syn 2.0.76",
 ]
 
-[[package]]
-name = "binout"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b60b1af88a588fca5fe424ae7d735bc52814f80ff57614f57043cc4e2024f2ea"
-
 [[package]]
 name = "bit-set"
 version = "0.5.3"
@@ -1270,15 +1264,6 @@ dependencies = [
  "serde",
 ]
 
-[[package]]
-name = "bitm"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b06e8e5bec3490b9f6f3adbb78aa4f53e8396fd9994e8a62a346b44ea7c15f35"
-dependencies = [
- "dyn_size_of",
-]
-
 [[package]]
 name = "bitvec"
 version = "1.0.1"
@@ -2542,12 +2527,6 @@ version = "1.0.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125"
 
-[[package]]
-name = "dyn_size_of"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33d4f78a40b1ec35bf8cafdaaf607ba2f773c366b0b3bda48937cacd7a8d5134"
-
 [[package]]
 name = "ecdsa"
 version = "0.16.9"
@@ -5293,19 +5272,6 @@ dependencies = [
  "ucd-trie",
 ]
 
-[[package]]
-name = "ph"
-version = "0.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86b7b74d575d7c11fb653fae69688be5206cafc1ead33c01ce61ac7f36eae45b"
-dependencies = [
- "binout",
- "bitm",
- "dyn_size_of",
- "rayon",
- "wyhash",
-]
-
 [[package]]
 name = "pharos"
 version = "0.5.3"
@@ -7502,7 +7468,6 @@ dependencies = [
  "derive_more 1.0.0",
  "lz4_flex",
  "memmap2",
- "ph",
  "rand 0.8.5",
  "reth-fs-util",
  "serde",
@@ -11185,15 +11150,6 @@ dependencies = [
  "web-sys",
 ]
 
-[[package]]
-name = "wyhash"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295"
-dependencies = [
- "rand_core 0.6.4",
-]
-
 [[package]]
 name = "wyz"
 version = "0.5.1"
diff --git a/crates/storage/db/src/static_file/cursor.rs b/crates/storage/db/src/static_file/cursor.rs
index 4a052c6abf..f22006c462 100644
--- a/crates/storage/db/src/static_file/cursor.rs
+++ b/crates/storage/db/src/static_file/cursor.rs
@@ -1,7 +1,7 @@
 use super::mask::{ColumnSelectorOne, ColumnSelectorThree, ColumnSelectorTwo};
 use derive_more::{Deref, DerefMut};
 use reth_db_api::table::Decompress;
-use reth_nippy_jar::{DataReader, NippyJar, NippyJarCursor};
+use reth_nippy_jar::{DataReader, NippyJar, NippyJarCursor, NippyJarError};
 use reth_primitives::{static_file::SegmentHeader, B256};
 use reth_storage_errors::provider::{ProviderError, ProviderResult};
 use std::sync::Arc;
@@ -39,7 +39,7 @@ impl<'a> StaticFileCursor<'a> {
         }
 
         let row = match key_or_num {
-            KeyOrNumber::Key(k) => self.row_by_key_with_cols(k, mask),
+            KeyOrNumber::Key(_) => Err(NippyJarError::UnsupportedFilterQuery),
             KeyOrNumber::Number(n) => match self.jar().user_header().start() {
                 Some(offset) => {
                     if offset > n {
diff --git a/crates/storage/nippy-jar/Cargo.toml b/crates/storage/nippy-jar/Cargo.toml
index 0bc3e40dc2..ba5846bdc4 100644
--- a/crates/storage/nippy-jar/Cargo.toml
+++ b/crates/storage/nippy-jar/Cargo.toml
@@ -19,7 +19,6 @@ name = "reth_nippy_jar"
 reth-fs-util.workspace = true
 
 # filter
-ph = "0.8.0"
 cuckoofilter = { version = "0.5.0", features = [
     "serde_support",
     "serde_bytes",
diff --git a/crates/storage/nippy-jar/src/cursor.rs b/crates/storage/nippy-jar/src/cursor.rs
index d42b0d364b..7af55fd436 100644
--- a/crates/storage/nippy-jar/src/cursor.rs
+++ b/crates/storage/nippy-jar/src/cursor.rs
@@ -1,10 +1,8 @@
 use crate::{
     compression::{Compression, Compressors, Zstd},
-    DataReader, InclusionFilter, NippyJar, NippyJarError, NippyJarHeader, PerfectHashingFunction,
-    RefRow,
+    DataReader, NippyJar, NippyJarError, NippyJarHeader, RefRow,
 };
 use std::{ops::Range, sync::Arc};
-use sucds::int_vectors::Access;
 use zstd::bulk::Decompressor;
 
 /// Simple cursor implementation to retrieve data from [`NippyJar`].
@@ -67,35 +65,6 @@ impl<'a, H: NippyJarHeader> NippyJarCursor<'a, H> {
         self.row = 0;
     }
 
-    /// Returns a row, searching it by a key.
-    ///
-    /// **May return false positives.**
-    ///
-    /// Example usage would be querying a transactions file with a transaction hash which is **NOT**
-    /// stored in file.
-    pub fn row_by_key(&mut self, key: &[u8]) -> Result<Option<RefRow<'_>>, NippyJarError> {
-        if let (Some(filter), Some(phf)) = (&self.jar.filter, &self.jar.phf) {
-            // TODO: is it worth to parallelize both?
-
-            // May have false positives
-            if filter.contains(key)? {
-                // May have false positives
-                if let Some(row_index) = phf.get_index(key)? {
-                    self.row = self
-                        .jar
-                        .offsets_index
-                        .access(row_index as usize)
-                        .expect("built from same set") as u64;
-                    return self.next_row()
-                }
-            }
-        } else {
-            return Err(NippyJarError::UnsupportedFilterQuery)
-        }
-
-        Ok(None)
-    }
-
     /// Returns a row by its number.
     pub fn row_by_number(&mut self, row: usize) -> Result<Option<RefRow<'_>>, NippyJarError> {
         self.row = row as u64;
@@ -130,40 +99,6 @@ impl<'a, H: NippyJarHeader> NippyJarCursor<'a, H> {
         ))
     }
 
-    /// Returns a row, searching it by a key using a
-    /// `mask` to only read certain columns from the row.
-    ///
-    /// **May return false positives.**
-    ///
-    /// Example usage would be querying a transactions file with a transaction hash which is **NOT**
-    /// stored in file.
-    pub fn row_by_key_with_cols(
-        &mut self,
-        key: &[u8],
-        mask: usize,
-    ) -> Result<Option<RefRow<'_>>, NippyJarError> {
-        if let (Some(filter), Some(phf)) = (&self.jar.filter, &self.jar.phf) {
-            // TODO: is it worth to parallelize both?
-
-            // May have false positives
-            if filter.contains(key)? {
-                // May have false positives
-                if let Some(row_index) = phf.get_index(key)? {
-                    self.row = self
-                        .jar
-                        .offsets_index
-                        .access(row_index as usize)
-                        .expect("built from same set") as u64;
-                    return self.next_row_with_cols(mask)
-                }
-            }
-        } else {
-            return Err(NippyJarError::UnsupportedFilterQuery)
-        }
-
-        Ok(None)
-    }
-
     /// Returns a row by its number by using a `mask` to only read certain columns from the row.
     pub fn row_by_number_with_cols(
         &mut self,
diff --git a/crates/storage/nippy-jar/src/error.rs b/crates/storage/nippy-jar/src/error.rs
index 225d4fba30..6a5714e1e4 100644
--- a/crates/storage/nippy-jar/src/error.rs
+++ b/crates/storage/nippy-jar/src/error.rs
@@ -31,10 +31,6 @@ pub enum NippyJarError {
     FilterMaxCapacity,
     #[error("cuckoo was not properly initialized after loaded")]
     FilterCuckooNotLoaded,
-    #[error("perfect hashing function doesn't have any keys added")]
-    PHFMissingKeys,
-    #[error("nippy jar initialized without perfect hashing function")]
-    PHFMissing,
     #[error("nippy jar was built without an index")]
     UnsupportedFilterQuery,
     #[error("the size of an offset must be at most 8 bytes, got {offset_size}")]
diff --git a/crates/storage/nippy-jar/src/lib.rs b/crates/storage/nippy-jar/src/lib.rs
index 056f456eb2..60ed573461 100644
--- a/crates/storage/nippy-jar/src/lib.rs
+++ b/crates/storage/nippy-jar/src/lib.rs
@@ -32,9 +32,10 @@ pub mod compression;
 use compression::Compression;
 use compression::Compressors;
 
-pub mod phf;
-pub use phf::PHFKey;
-use phf::{Fmph, Functions, GoFmph, PerfectHashingFunction};
+/// empty enum for backwards compatibility
+#[derive(Debug, Serialize, Deserialize)]
+#[cfg_attr(test, derive(PartialEq, Eq))]
+pub enum Functions {}
 
 mod error;
 pub use error::NippyJarError;
@@ -74,24 +75,6 @@ impl<T> NippyJarHeader for T where
 ///
 /// Data is organized into a columnar format, enabling column-based compression. Data retrieval
 /// entails consulting an offset list and fetching the data from file via `mmap`.
-///
-/// PHF & Filters:
-/// For data membership verification, the `filter` field can be configured with algorithms like
-/// Bloom or Cuckoo filters. While these filters enable rapid membership checks, it's important to
-/// note that **they may yield false positives but not false negatives**. Therefore, they serve as
-/// preliminary checks (eg. in `by_hash` queries) and should be followed by data verification on
-/// retrieval.
-///
-/// The `phf` (Perfect Hashing Function) and `offsets_index` fields facilitate the data retrieval
-/// process in for example `by_hash` queries. Specifically, the PHF converts a query, such as a
-/// block hash, into a unique integer. This integer is then used as an index in `offsets_index`,
-/// which maps to the actual data location in the `offsets` list. Similar to the `filter`, the PHF
-/// may also produce false positives but not false negatives, necessitating subsequent data
-/// verification.
-///
-/// Note: that the key (eg. `BlockHash`) passed to a filter and phf does not need to actually be
-/// stored.
-///
 /// Ultimately, the `freeze` function yields two files: a data file containing both the data and its
 /// configuration, and an index file that houses the offsets and `offsets_index`.
 #[derive(Serialize, Deserialize)]
@@ -112,7 +95,7 @@ pub struct NippyJar<H = ()> {
     /// Optional filter function for data membership checks.
     filter: Option<InclusionFilters>,
     #[serde(skip)]
-    /// Optional Perfect Hashing Function (PHF) for unique offset mapping.
+    /// Optional field for backwards compatibility
     phf: Option<Functions>,
     /// Index mapping PHF output to value offsets in `offsets`.
     #[serde(skip)]
@@ -196,18 +179,6 @@ impl<H: NippyJarHeader> NippyJar<H> {
         self
     }
 
-    /// Adds [`phf::Fmph`] perfect hashing function.
-    pub fn with_fmph(mut self) -> Self {
-        self.phf = Some(Functions::Fmph(Fmph::new()));
-        self
-    }
-
-    /// Adds [`phf::GoFmph`] perfect hashing function.
-    pub fn with_gofmph(mut self) -> Self {
-        self.phf = Some(Functions::GoFmph(GoFmph::new()));
-        self
-    }
-
     /// Gets a reference to the user header.
     pub const fn user_header(&self) -> &H {
         &self.user_header
@@ -346,16 +317,6 @@ impl<H: NippyJarHeader> InclusionFilter for NippyJar<H> {
     }
 }
 
-impl<H: NippyJarHeader> PerfectHashingFunction for NippyJar<H> {
-    fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
-        self.phf.as_mut().ok_or(NippyJarError::PHFMissing)?.set_keys(keys)
-    }
-
-    fn get_index(&self, key: &[u8]) -> Result<Option<u64>, NippyJarError> {
-        self.phf.as_ref().ok_or(NippyJarError::PHFMissing)?.get_index(key)
-    }
-}
-
 #[cfg(test)]
 impl<H: NippyJarHeader> NippyJar<H> {
     /// If required, prepares any compression algorithm to an early pass of the data.
@@ -371,55 +332,6 @@ impl<H: NippyJarHeader> NippyJar<H> {
         Ok(())
     }
 
-    /// Prepares beforehand the offsets index for querying rows based on `values` (eg. transaction
-    /// hash). Expects `values` to be sorted in the same way as the data that is going to be
-    /// later on inserted.
-    ///
-    /// Currently collecting all items before acting on them.
-    pub fn prepare_index<T: PHFKey>(
-        &mut self,
-        values: impl IntoIterator<Item = ColumnResult<T>>,
-        row_count: usize,
-    ) -> Result<(), NippyJarError> {
-        debug!(target: "nippy-jar", ?row_count, "Preparing index.");
-
-        let values = values.into_iter().collect::<Result<Vec<_>, _>>()?;
-
-        debug_assert!(
-            row_count == values.len(),
-            "Row count ({row_count}) differs from value list count ({}).",
-            values.len()
-        );
-
-        let mut offsets_index = vec![0; row_count];
-
-        // Builds perfect hashing function from the values
-        if let Some(phf) = self.phf.as_mut() {
-            debug!(target: "nippy-jar", ?row_count, values_count = ?values.len(), "Setting keys for perfect hashing function.");
-            phf.set_keys(&values)?;
-        }
-
-        if self.filter.is_some() || self.phf.is_some() {
-            debug!(target: "nippy-jar", ?row_count, "Creating filter and offsets_index.");
-
-            for (row_num, v) in values.into_iter().enumerate() {
-                if let Some(filter) = self.filter.as_mut() {
-                    filter.add(v.as_ref())?;
-                }
-
-                if let Some(phf) = self.phf.as_mut() {
-                    // Points to the first column value offset of the row.
-                    let index = phf.get_index(v.as_ref())?.expect("initialized") as usize;
-                    let _ = std::mem::replace(&mut offsets_index[index], row_num as u64);
-                }
-            }
-        }
-
-        debug!(target: "nippy-jar", ?row_count, "Encoding offsets index list.");
-        self.offsets_index = PrefixSummedEliasFano::from_slice(&offsets_index)?;
-        Ok(())
-    }
-
     /// Writes all data and configuration to a file and the offset index to another.
     pub fn freeze(
         self,
@@ -447,7 +359,7 @@ impl<H: NippyJarHeader> NippyJar<H> {
         Ok(writer.into_jar())
     }
 
-    /// Freezes [`PerfectHashingFunction`], [`InclusionFilter`] and the offset index to file.
+    /// Freezes  [`InclusionFilter`] and the offset index to file.
     fn freeze_filters(&self) -> Result<(), NippyJarError> {
         debug!(target: "nippy-jar", path=?self.index_path(), "Writing offsets and offsets index to file.");
 
@@ -474,11 +386,6 @@ impl<H: NippyJarHeader> NippyJar<H> {
             }
         }
 
-        // Check `prepare_index` was called.
-        if let Some(phf) = &self.phf {
-            let _ = phf.get_index(&[])?;
-        }
-
         Ok(())
     }
 }
@@ -588,7 +495,7 @@ mod tests {
     use super::*;
     use compression::Compression;
     use rand::{rngs::SmallRng, seq::SliceRandom, RngCore, SeedableRng};
-    use std::{collections::HashSet, fs::OpenOptions};
+    use std::{fs::OpenOptions, io::Read};
 
     type ColumnResults<T> = Vec<ColumnResult<T>>;
     type ColumnValues = Vec<Vec<u8>>;
@@ -617,57 +524,30 @@ mod tests {
     }
 
     #[test]
-    fn test_phf() {
-        let (col1, col2) = test_data(None);
-        let num_columns = 2;
-        let num_rows = col1.len() as u64;
-        let file_path = tempfile::NamedTempFile::new().unwrap();
+    fn test_config_serialization() {
+        let file = tempfile::NamedTempFile::new().unwrap();
+        let jar = NippyJar::new_without_header(23, file.path()).with_lz4();
+        jar.freeze_config().unwrap();
 
-        let create_nippy = || -> NippyJar<()> {
-            let mut nippy = NippyJar::new_without_header(num_columns, file_path.path());
-            assert!(matches!(
-                NippyJar::set_keys(&mut nippy, &col1),
-                Err(NippyJarError::PHFMissing)
-            ));
-            nippy
-        };
+        let mut config_file = OpenOptions::new().read(true).open(jar.config_path()).unwrap();
+        let config_file_len = config_file.metadata().unwrap().len();
+        assert_eq!(config_file_len, 37);
 
-        let check_phf = |mut nippy: NippyJar<_>| {
-            assert!(matches!(
-                NippyJar::get_index(&nippy, &col1[0]),
-                Err(NippyJarError::PHFMissingKeys)
-            ));
-            assert!(NippyJar::set_keys(&mut nippy, &col1).is_ok());
+        let mut buf = Vec::with_capacity(config_file_len as usize);
+        config_file.read_to_end(&mut buf).unwrap();
 
-            let collect_indexes = |nippy: &NippyJar<_>| -> Vec<u64> {
-                col1.iter()
-                    .map(|value| NippyJar::get_index(nippy, value.as_slice()).unwrap().unwrap())
-                    .collect()
-            };
+        assert_eq!(
+            vec![
+                1, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            ],
+            buf
+        );
 
-            // Ensure all indexes are unique
-            let indexes = collect_indexes(&nippy);
-            assert_eq!(indexes.iter().collect::<HashSet<_>>().len(), indexes.len());
-
-            // Ensure reproducibility
-            assert!(NippyJar::set_keys(&mut nippy, &col1).is_ok());
-            assert_eq!(indexes, collect_indexes(&nippy));
-
-            // Ensure that loaded phf provides the same function outputs
-            nippy.prepare_index(clone_with_result(&col1), col1.len()).unwrap();
-            nippy
-                .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows)
-                .unwrap();
-            let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
-            loaded_nippy.load_filters().unwrap();
-            assert_eq!(indexes, collect_indexes(&loaded_nippy));
-        };
-
-        // fmph bytes size for 100 values of 32 bytes: 54
-        check_phf(create_nippy().with_fmph());
-
-        // fmph bytes size for 100 values of 32 bytes: 46
-        check_phf(create_nippy().with_gofmph());
+        let mut read_jar = bincode::deserialize_from::<_, NippyJar>(&buf[..]).unwrap();
+        // Path is not ser/de
+        read_jar.path = file.path().to_path_buf();
+        assert_eq!(jar, read_jar);
     }
 
     #[test]
@@ -891,11 +771,9 @@ mod tests {
             let mut nippy =
                 NippyJar::new(num_columns, file_path.path(), BlockJarHeader { block_start })
                     .with_zstd(true, 5000)
-                    .with_cuckoo_filter(col1.len())
-                    .with_fmph();
+                    .with_cuckoo_filter(col1.len());
 
             nippy.prepare_compression(data.clone()).unwrap();
-            nippy.prepare_index(clone_with_result(&col1), col1.len()).unwrap();
             nippy
                 .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows)
                 .unwrap();
@@ -908,7 +786,6 @@ mod tests {
 
             assert!(loaded_nippy.compressor().is_some());
             assert!(loaded_nippy.filter.is_some());
-            assert!(loaded_nippy.phf.is_some());
             assert_eq!(loaded_nippy.user_header().block_start, block_start);
 
             if let Some(Compressors::Zstd(_zstd)) = loaded_nippy.compressor() {
@@ -929,22 +806,9 @@ mod tests {
                 data.shuffle(&mut rand::thread_rng());
 
                 for (row_num, (v0, v1)) in data {
-                    // Simulates `by_hash` queries by iterating col1 values, which were used to
-                    // create the inner index.
-                    {
-                        let row_by_value = cursor
-                            .row_by_key(v0)
-                            .unwrap()
-                            .unwrap()
-                            .iter()
-                            .map(|a| a.to_vec())
-                            .collect::<Vec<_>>();
-                        assert_eq!((&row_by_value[0], &row_by_value[1]), (v0, v1));
-
-                        // Simulates `by_number` queries
-                        let row_by_num = cursor.row_by_number(row_num).unwrap().unwrap();
-                        assert_eq!(row_by_value, row_by_num);
-                    }
+                    // Simulates `by_number` queries
+                    let row_by_num = cursor.row_by_number(row_num).unwrap().unwrap();
+                    assert_eq!((&row_by_num[0].to_vec(), &row_by_num[1].to_vec()), (v0, v1));
                 }
             }
         }
@@ -962,11 +826,9 @@ mod tests {
         {
             let mut nippy = NippyJar::new_without_header(num_columns, file_path.path())
                 .with_zstd(true, 5000)
-                .with_cuckoo_filter(col1.len())
-                .with_fmph();
+                .with_cuckoo_filter(col1.len());
 
             nippy.prepare_compression(data).unwrap();
-            nippy.prepare_index(clone_with_result(&col1), col1.len()).unwrap();
             nippy
                 .freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows)
                 .unwrap();
@@ -989,84 +851,41 @@ mod tests {
 
                 // Read both columns
                 for (row_num, (v0, v1)) in &data {
-                    // Simulates `by_hash` queries by iterating col1 values, which were used to
-                    // create the inner index.
-                    let row_by_value = cursor
-                        .row_by_key_with_cols(v0, BLOCKS_FULL_MASK)
-                        .unwrap()
-                        .unwrap()
-                        .iter()
-                        .map(|a| a.to_vec())
-                        .collect::<Vec<_>>();
-                    assert_eq!((&row_by_value[0], &row_by_value[1]), (*v0, *v1));
-
                     // Simulates `by_number` queries
                     let row_by_num = cursor
                         .row_by_number_with_cols(*row_num, BLOCKS_FULL_MASK)
                         .unwrap()
                         .unwrap();
-                    assert_eq!(row_by_value, row_by_num);
+                    assert_eq!((&row_by_num[0].to_vec(), &row_by_num[1].to_vec()), (*v0, *v1));
                 }
 
                 // Read first column only: `Block`
                 const BLOCKS_BLOCK_MASK: usize = 0b01;
                 for (row_num, (v0, _)) in &data {
-                    // Simulates `by_hash` queries by iterating col1 values, which were used to
-                    // create the inner index.
-                    let row_by_value = cursor
-                        .row_by_key_with_cols(v0, BLOCKS_BLOCK_MASK)
-                        .unwrap()
-                        .unwrap()
-                        .iter()
-                        .map(|a| a.to_vec())
-                        .collect::<Vec<_>>();
-                    assert_eq!(row_by_value.len(), 1);
-                    assert_eq!(&row_by_value[0], *v0);
-
                     // Simulates `by_number` queries
                     let row_by_num = cursor
                         .row_by_number_with_cols(*row_num, BLOCKS_BLOCK_MASK)
                         .unwrap()
                         .unwrap();
                     assert_eq!(row_by_num.len(), 1);
-                    assert_eq!(row_by_value, row_by_num);
+                    assert_eq!(&row_by_num[0].to_vec(), *v0);
                 }
 
                 // Read second column only: `Block`
                 const BLOCKS_WITHDRAWAL_MASK: usize = 0b10;
-                for (row_num, (v0, v1)) in &data {
-                    // Simulates `by_hash` queries by iterating col1 values, which were used to
-                    // create the inner index.
-                    let row_by_value = cursor
-                        .row_by_key_with_cols(v0, BLOCKS_WITHDRAWAL_MASK)
-                        .unwrap()
-                        .unwrap()
-                        .iter()
-                        .map(|a| a.to_vec())
-                        .collect::<Vec<_>>();
-                    assert_eq!(row_by_value.len(), 1);
-                    assert_eq!(&row_by_value[0], *v1);
-
+                for (row_num, (_, v1)) in &data {
                     // Simulates `by_number` queries
                     let row_by_num = cursor
                         .row_by_number_with_cols(*row_num, BLOCKS_WITHDRAWAL_MASK)
                         .unwrap()
                         .unwrap();
                     assert_eq!(row_by_num.len(), 1);
-                    assert_eq!(row_by_value, row_by_num);
+                    assert_eq!(&row_by_num[0].to_vec(), *v1);
                 }
 
                 // Read nothing
                 const BLOCKS_EMPTY_MASK: usize = 0b00;
-                for (row_num, (v0, _)) in &data {
-                    // Simulates `by_hash` queries by iterating col1 values, which were used to
-                    // create the inner index.
-                    assert!(cursor
-                        .row_by_key_with_cols(v0, BLOCKS_EMPTY_MASK)
-                        .unwrap()
-                        .unwrap()
-                        .is_empty());
-
+                for (row_num, _) in &data {
                     // Simulates `by_number` queries
                     assert!(cursor
                         .row_by_number_with_cols(*row_num, BLOCKS_EMPTY_MASK)
diff --git a/crates/storage/nippy-jar/src/phf/fmph.rs b/crates/storage/nippy-jar/src/phf/fmph.rs
deleted file mode 100644
index a332c40cf7..0000000000
--- a/crates/storage/nippy-jar/src/phf/fmph.rs
+++ /dev/null
@@ -1,99 +0,0 @@
-use crate::{NippyJarError, PHFKey, PerfectHashingFunction};
-use ph::fmph::{BuildConf, Function};
-use serde::{
-    de::Error as DeSerdeError, ser::Error as SerdeError, Deserialize, Deserializer, Serialize,
-    Serializer,
-};
-
-/// Wrapper struct for [`Function`]. Implementation of the following [paper](https://dl.acm.org/doi/10.1145/3596453).
-#[derive(Default)]
-pub struct Fmph {
-    function: Option<Function>,
-}
-
-impl Fmph {
-    pub const fn new() -> Self {
-        Self { function: None }
-    }
-}
-
-impl PerfectHashingFunction for Fmph {
-    fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
-        self.function = Some(Function::from_slice_with_conf(
-            keys,
-            BuildConf { use_multiple_threads: true, ..Default::default() },
-        ));
-        Ok(())
-    }
-
-    fn get_index(&self, key: &[u8]) -> Result<Option<u64>, NippyJarError> {
-        if let Some(f) = &self.function {
-            return Ok(f.get(key))
-        }
-        Err(NippyJarError::PHFMissingKeys)
-    }
-}
-
-#[cfg(test)]
-impl PartialEq for Fmph {
-    fn eq(&self, _other: &Self) -> bool {
-        match (&self.function, &_other.function) {
-            (Some(func1), Some(func2)) => {
-                func1.level_sizes() == func2.level_sizes() &&
-                    func1.write_bytes() == func2.write_bytes() &&
-                    {
-                        let mut f1 = Vec::with_capacity(func1.write_bytes());
-                        func1.write(&mut f1).expect("enough capacity");
-
-                        let mut f2 = Vec::with_capacity(func2.write_bytes());
-                        func2.write(&mut f2).expect("enough capacity");
-
-                        f1 == f2
-                    }
-            }
-            (None, None) => true,
-            _ => false,
-        }
-    }
-}
-
-impl std::fmt::Debug for Fmph {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("Fmph")
-            .field("bytes_size", &self.function.as_ref().map(|f| f.write_bytes()))
-            .finish_non_exhaustive()
-    }
-}
-
-impl Serialize for Fmph {
-    /// Potentially expensive, but should be used only when creating the file.
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        match &self.function {
-            Some(f) => {
-                let mut v = Vec::with_capacity(f.write_bytes());
-                f.write(&mut v).map_err(S::Error::custom)?;
-                serializer.serialize_some(&v)
-            }
-            None => serializer.serialize_none(),
-        }
-    }
-}
-
-impl<'de> Deserialize<'de> for Fmph {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: Deserializer<'de>,
-    {
-        if let Some(buffer) = <Option<Vec<u8>>>::deserialize(deserializer)? {
-            return Ok(Self {
-                function: Some(
-                    Function::read(&mut std::io::Cursor::new(buffer)).map_err(D::Error::custom)?,
-                ),
-            })
-        }
-        Ok(Self { function: None })
-    }
-}
diff --git a/crates/storage/nippy-jar/src/phf/go_fmph.rs b/crates/storage/nippy-jar/src/phf/go_fmph.rs
deleted file mode 100644
index 328ddcb4dd..0000000000
--- a/crates/storage/nippy-jar/src/phf/go_fmph.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-use crate::{NippyJarError, PHFKey, PerfectHashingFunction};
-use ph::fmph::{GOBuildConf, GOFunction};
-use serde::{
-    de::Error as DeSerdeError, ser::Error as SerdeError, Deserialize, Deserializer, Serialize,
-    Serializer,
-};
-
-/// Wrapper struct for [`GOFunction`]. Implementation of the following [paper](https://dl.acm.org/doi/10.1145/3596453).
-#[derive(Default)]
-pub struct GoFmph {
-    function: Option<GOFunction>,
-}
-
-impl GoFmph {
-    pub const fn new() -> Self {
-        Self { function: None }
-    }
-}
-
-impl PerfectHashingFunction for GoFmph {
-    fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
-        self.function = Some(GOFunction::from_slice_with_conf(
-            keys,
-            GOBuildConf { use_multiple_threads: true, ..Default::default() },
-        ));
-        Ok(())
-    }
-
-    fn get_index(&self, key: &[u8]) -> Result<Option<u64>, NippyJarError> {
-        if let Some(f) = &self.function {
-            return Ok(f.get(key))
-        }
-        Err(NippyJarError::PHFMissingKeys)
-    }
-}
-
-#[cfg(test)]
-impl PartialEq for GoFmph {
-    fn eq(&self, other: &Self) -> bool {
-        match (&self.function, &other.function) {
-            (Some(func1), Some(func2)) => {
-                func1.level_sizes() == func2.level_sizes() &&
-                    func1.write_bytes() == func2.write_bytes() &&
-                    {
-                        let mut f1 = Vec::with_capacity(func1.write_bytes());
-                        func1.write(&mut f1).expect("enough capacity");
-
-                        let mut f2 = Vec::with_capacity(func2.write_bytes());
-                        func2.write(&mut f2).expect("enough capacity");
-
-                        f1 == f2
-                    }
-            }
-            (None, None) => true,
-            _ => false,
-        }
-    }
-}
-
-impl std::fmt::Debug for GoFmph {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("GoFmph")
-            .field("bytes_size", &self.function.as_ref().map(|f| f.write_bytes()))
-            .finish_non_exhaustive()
-    }
-}
-
-impl Serialize for GoFmph {
-    /// Potentially expensive, but should be used only when creating the file.
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        match &self.function {
-            Some(f) => {
-                let mut v = Vec::with_capacity(f.write_bytes());
-                f.write(&mut v).map_err(S::Error::custom)?;
-                serializer.serialize_some(&v)
-            }
-            None => serializer.serialize_none(),
-        }
-    }
-}
-
-impl<'de> Deserialize<'de> for GoFmph {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: Deserializer<'de>,
-    {
-        if let Some(buffer) = <Option<Vec<u8>>>::deserialize(deserializer)? {
-            return Ok(Self {
-                function: Some(
-                    GOFunction::read(&mut std::io::Cursor::new(buffer))
-                        .map_err(D::Error::custom)?,
-                ),
-            })
-        }
-        Ok(Self { function: None })
-    }
-}
diff --git a/crates/storage/nippy-jar/src/phf/mod.rs b/crates/storage/nippy-jar/src/phf/mod.rs
deleted file mode 100644
index ade48b60a3..0000000000
--- a/crates/storage/nippy-jar/src/phf/mod.rs
+++ /dev/null
@@ -1,46 +0,0 @@
-use crate::NippyJarError;
-use serde::{Deserialize, Serialize};
-use std::hash::Hash;
-
-mod fmph;
-pub use fmph::Fmph;
-
-mod go_fmph;
-pub use go_fmph::GoFmph;
-
-/// Trait alias for [`PerfectHashingFunction`] keys.
-pub trait PHFKey: AsRef<[u8]> + Sync + Clone + Hash {}
-impl<T: AsRef<[u8]> + Sync + Clone + Hash> PHFKey for T {}
-
-/// Trait to build and query a perfect hashing function.
-pub trait PerfectHashingFunction: Serialize + for<'a> Deserialize<'a> {
-    /// Adds the key set and builds the perfect hashing function.
-    fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError>;
-
-    /// Get corresponding associated integer. There might be false positives.
-    fn get_index(&self, key: &[u8]) -> Result<Option<u64>, NippyJarError>;
-}
-
-/// Enumerates all types of perfect hashing functions.
-#[derive(Debug, Serialize, Deserialize)]
-#[cfg_attr(test, derive(PartialEq))]
-pub enum Functions {
-    Fmph(Fmph),
-    GoFmph(GoFmph),
-}
-
-impl PerfectHashingFunction for Functions {
-    fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
-        match self {
-            Self::Fmph(f) => f.set_keys(keys),
-            Self::GoFmph(f) => f.set_keys(keys),
-        }
-    }
-
-    fn get_index(&self, key: &[u8]) -> Result<Option<u64>, NippyJarError> {
-        match self {
-            Self::Fmph(f) => f.get_index(key),
-            Self::GoFmph(f) => f.get_index(key),
-        }
-    }
-}