From 027fc2bbf29715fa796a125a859383dfdf68a52f Mon Sep 17 00:00:00 2001 From: joshieDo <93316087+joshieDo@users.noreply.github.com> Date: Wed, 23 Nov 2022 12:46:45 +0800 Subject: [PATCH] feat(db): derive `Compact` codec (#177) --- .github/scripts/fuzz.sh | 16 + .github/workflows/ci.yml | 7 + Cargo.lock | 29 ++ crates/codecs/Cargo.toml | 10 +- crates/codecs/derive/Cargo.toml | 5 +- crates/codecs/derive/src/compact/enums.rs | 120 +++++ crates/codecs/derive/src/compact/flags.rs | 133 ++++++ crates/codecs/derive/src/compact/generator.rs | 121 +++++ crates/codecs/derive/src/compact/mod.rs | 276 ++++++++++++ crates/codecs/derive/src/compact/structs.rs | 114 +++++ crates/codecs/derive/src/lib.rs | 21 + crates/codecs/src/lib.rs | 425 ++++++++++++++++++ crates/db/benches/encoding_crit.rs | 8 +- crates/db/benches/encoding_iai.rs | 4 +- crates/interfaces/Cargo.toml | 1 + crates/interfaces/src/db/codecs/compact.rs | 36 ++ crates/interfaces/src/db/codecs/fuzz/mod.rs | 3 +- crates/interfaces/src/db/codecs/mod.rs | 1 + crates/interfaces/src/db/codecs/scale.rs | 36 +- crates/interfaces/src/db/models/accounts.rs | 6 +- crates/interfaces/src/db/models/blocks.rs | 9 +- crates/primitives/Cargo.toml | 6 + crates/primitives/src/account.rs | 29 +- crates/primitives/src/header.rs | 17 +- crates/primitives/src/integer_list.rs | 2 +- crates/primitives/src/log.rs | 6 +- crates/primitives/src/receipt.rs | 8 +- crates/primitives/src/storage.rs | 6 +- .../primitives/src/transaction/access_list.rs | 4 +- crates/primitives/src/transaction/tx_type.rs | 26 +- 30 files changed, 1413 insertions(+), 72 deletions(-) create mode 100755 .github/scripts/fuzz.sh create mode 100644 crates/codecs/derive/src/compact/enums.rs create mode 100644 crates/codecs/derive/src/compact/flags.rs create mode 100644 crates/codecs/derive/src/compact/generator.rs create mode 100644 crates/codecs/derive/src/compact/mod.rs create mode 100644 crates/codecs/derive/src/compact/structs.rs create mode 100644 crates/interfaces/src/db/codecs/compact.rs diff --git a/.github/scripts/fuzz.sh b/.github/scripts/fuzz.sh new file mode 100755 index 0000000000..9e598d49ac --- /dev/null +++ b/.github/scripts/fuzz.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Runs fuzz tests using `cargo test-fuzz`. These should only be run after a `cargo test` has been executed once: https://github.com/trailofbits/test-fuzz#usage + +PACKAGE=$1 +TEST_TIME=${2:-5} + +# Gets the list of tests present in the package. +TESTS=$(cargo test-fuzz --list -p $PACKAGE | head -n -3 | tail -n+9 | cat - <(echo \"--list\"]) | cat - | jq -r ".[]") + +for test in $TESTS +do + set -x + cargo test-fuzz --no-ui -p "$PACKAGE" $test -- -V $TEST_TIME + set +x +done; \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d7c8334865..43c8f0c10c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,6 +34,13 @@ jobs: command: nextest args: run --locked --workspace --all-features + - name: Run fuzz tests + run: | + ./.github/scripts/fuzz.sh reth-primitives + ./.github/scripts/fuzz.sh reth-db + ./.github/scripts/fuzz.sh reth-interfaces + ./.github/scripts/fuzz.sh reth-codecs + lint: runs-on: ubuntu-latest steps: diff --git a/Cargo.lock b/Cargo.lock index ede6d87e58..25c2064571 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2280,6 +2280,27 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "modular-bitfield" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a53d79ba8304ac1c4f9eb3b9d281f21f7be9d4626f72ce7df4ad8fbde4f38a74" +dependencies = [ + "modular-bitfield-impl", + "static_assertions", +] + +[[package]] +name = "modular-bitfield-impl" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a7d5f7076603ebc68de2dc6a650ec331a062a13abaa346975be747bbfa4b789" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "more-asserts" version = "0.2.2" @@ -3084,7 +3105,12 @@ dependencies = [ name = "reth-codecs" version = "0.1.0" dependencies = [ + "bytes", "codecs-derive", + "ethers-core", + "modular-bitfield", + "serde", + "test-fuzz", ] [[package]] @@ -3254,6 +3280,7 @@ dependencies = [ "bytes", "futures", "heapless", + "modular-bitfield", "parity-scale-codec", "postcard", "rand 0.8.5", @@ -3373,6 +3400,7 @@ dependencies = [ "hex", "hex-literal", "maplit", + "modular-bitfield", "parity-scale-codec", "plain_hasher", "reth-codecs", @@ -3381,6 +3409,7 @@ dependencies = [ "serde", "serde_json", "sucds", + "test-fuzz", "thiserror", "tiny-keccak", "triehash", diff --git a/crates/codecs/Cargo.toml b/crates/codecs/Cargo.toml index 5a1ea47874..45b054a706 100644 --- a/crates/codecs/Cargo.toml +++ b/crates/codecs/Cargo.toml @@ -8,9 +8,17 @@ readme = "README.md" [features] default = ["scale"] +compact = ["codecs-derive/compact"] scale = ["codecs-derive/scale"] postcard = ["codecs-derive/postcard"] no_codec = ["codecs-derive/no_codec"] [dependencies] -codecs-derive = { version = "0.1.0", path = "./derive", default-features = false } \ No newline at end of file +bytes = "1.2.1" +codecs-derive = { version = "0.1.0", path = "./derive", default-features = false } +ethers-core = { git = "https://github.com/gakonst/ethers-rs", default-features = false } + +[dev-dependencies] +serde = "1.0" +modular-bitfield = "0.11.2" +test-fuzz = "3.0.4" \ No newline at end of file diff --git a/crates/codecs/derive/Cargo.toml b/crates/codecs/derive/Cargo.toml index a01f655d3b..f4a88c6a26 100644 --- a/crates/codecs/derive/Cargo.toml +++ b/crates/codecs/derive/Cargo.toml @@ -10,7 +10,7 @@ readme = "../README.md" proc-macro = true [dependencies] -proc-macro2 = "1.0" +proc-macro2 = "1.0.47" quote = "1.0" syn = { version = "1.0", features = ["full"] } @@ -20,6 +20,7 @@ parity-scale-codec = { version = "3.2.1", features = ["derive", "bytes"] } [features] default = ["scale"] +compact = [] scale = [] postcard = [] -no_codec = [] \ No newline at end of file +no_codec = [] diff --git a/crates/codecs/derive/src/compact/enums.rs b/crates/codecs/derive/src/compact/enums.rs new file mode 100644 index 0000000000..62c4ac47aa --- /dev/null +++ b/crates/codecs/derive/src/compact/enums.rs @@ -0,0 +1,120 @@ +use super::*; + +#[derive(Debug)] +pub struct EnumHandler<'a> { + current_variant_index: u8, + fields_iterator: std::iter::Peekable>, + enum_lines: Vec, +} + +impl<'a> EnumHandler<'a> { + pub fn new(fields: &'a FieldList) -> Self { + EnumHandler { + current_variant_index: 0u8, + enum_lines: vec![], + fields_iterator: fields.iter().peekable(), + } + } + + pub fn next_field(&mut self) -> Option<&'a FieldTypes> { + self.fields_iterator.next() + } + + pub fn generate_to(mut self, ident: &Ident) -> Vec { + while let Some(field) = self.next_field() { + match field { + // The following method will advance the + // `fields_iterator` by itself and stop right before the next variant. + FieldTypes::EnumVariant(name) => self.to(name, ident), + FieldTypes::EnumUnnamedField(_) => unreachable!(), + FieldTypes::StructField(_) => unreachable!(), + } + } + self.enum_lines + } + + pub fn generate_from(mut self, ident: &Ident) -> Vec { + while let Some(field) = self.next_field() { + match field { + // The following method will advance the + // `fields_iterator` by itself and stop right before the next variant. + FieldTypes::EnumVariant(name) => self.from(name, ident), + FieldTypes::EnumUnnamedField(_) => unreachable!(), + FieldTypes::StructField(_) => unreachable!(), + } + } + self.enum_lines + } + + /// Generates `from_compact` code for an enum variant. + /// + /// `fields_iterator` might look something like \[VariantUnit, VariantUnamedField, Field, + /// VariantUnit...\]. + pub fn from(&mut self, variant_name: &str, ident: &Ident) { + let variant_name = format_ident!("{variant_name}"); + let current_variant_index = self.current_variant_index; + + if let Some(next_field) = self.fields_iterator.peek() { + match next_field { + FieldTypes::EnumUnnamedField(next_ftype) => { + // This variant is of the type `EnumVariant(UnnamedField)` + let field_type = format_ident!("{next_ftype}"); + + // Unamed type + self.enum_lines.push(quote! { + #current_variant_index => { + let mut inner = #field_type::default(); + (inner, buf) = #field_type::from_compact(buf, buf.len()); + #ident::#variant_name(inner) + } + }); + self.fields_iterator.next(); + } + FieldTypes::EnumVariant(_) => self.enum_lines.push(quote! { + #current_variant_index => #ident::#variant_name, + }), + FieldTypes::StructField(_) => unreachable!(), + }; + } else { + // This variant has no fields: Unit type + self.enum_lines.push(quote! { + #current_variant_index => #ident::#variant_name, + }); + } + self.current_variant_index += 1; + } + + /// Generates `to_compact` code for an enum variant. + /// + /// `fields_iterator` might look something like [VariantUnit, VariantUnamedField, Field, + /// VariantUnit...]. + pub fn to(&mut self, variant_name: &str, ident: &Ident) { + let variant_name = format_ident!("{variant_name}"); + let current_variant_index = self.current_variant_index; + + if let Some(next_field) = self.fields_iterator.peek() { + match next_field { + FieldTypes::EnumUnnamedField(_) => { + // Unamed type + self.enum_lines.push(quote! { + #ident::#variant_name(field) => { + field.to_compact(&mut buffer); + #current_variant_index + }, + }); + self.fields_iterator.next(); + } + FieldTypes::EnumVariant(_) => self.enum_lines.push(quote! { + #ident::#variant_name => #current_variant_index, + }), + FieldTypes::StructField(_) => unreachable!(), + }; + } else { + // This variant has no fields: Unit type + self.enum_lines.push(quote! { + #ident::#variant_name => #current_variant_index, + }); + } + self.current_variant_index += 1; + } +} diff --git a/crates/codecs/derive/src/compact/flags.rs b/crates/codecs/derive/src/compact/flags.rs new file mode 100644 index 0000000000..25a7384478 --- /dev/null +++ b/crates/codecs/derive/src/compact/flags.rs @@ -0,0 +1,133 @@ +use super::*; + +/// Generates the flag fieldset struct that is going to be used to store the length of fields and +/// their potential presence. +pub(crate) fn generate_flag_struct(ident: &Ident, fields: &FieldList) -> TokenStream2 { + let is_enum = fields.iter().any(|field| matches!(field, FieldTypes::EnumVariant(_))); + + let flags_ident = format_ident!("{ident}Flags"); + let mut field_flags = vec![]; + + let total_bits = if is_enum { + field_flags.push(quote! { + variant: B8, + }); + 8 + } else { + build_struct_field_flags( + fields + .iter() + .filter_map(|f| { + if let FieldTypes::StructField(f) = f { + return Some(f) + } + None + }) + .collect::>(), + &mut field_flags, + ) + }; + + if total_bits == 0 { + return placeholder_flag_struct(&flags_ident) + } + + let total_bytes = pad_flag_struct(total_bits, &mut field_flags); + + // Provides the number of bytes used to represent the flag struct. + let readable_bytes = vec![ + quote! { + buf.get_u8(), + }; + total_bytes.into() + ]; + + // Generate the flag struct. + quote! { + #[bitfield] + #[derive(Clone, Copy, Debug, Default)] + struct #flags_ident { + #(#field_flags)* + } + + impl #flags_ident { + fn from(mut buf: &[u8]) -> (Self, &[u8]) { + (#flags_ident::from_bytes([ + #(#readable_bytes)* + ]), buf) + } + } + } +} + +/// Builds the flag struct for the user struct fields. +/// +/// Returns the total number of bits necessary. +fn build_struct_field_flags( + fields: Vec<&StructFieldDescriptor>, + field_flags: &mut Vec, +) -> u8 { + let mut total_bits = 0; + + // Find out the adequate bit size for the length of each field, if applicable. + for (name, ftype, is_compact) in fields { + if *is_compact { + if is_flag_type(ftype) { + let name = format_ident!("{name}_len"); + let bitsize = get_bit_size(ftype); + let bsize = format_ident!("B{bitsize}"); + total_bits += bitsize; + + field_flags.push(quote! { + #name: #bsize , + }); + } else { + let name = format_ident!("{name}"); + + field_flags.push(quote! { + #name: bool , + }); + + total_bits += 1; + } + } + } + total_bits +} + +/// Total number of bits should be divisible by 8, so we might need to pad the struct with an unused +/// skipped field. +/// +/// Returns the total number of bytes used by the flags struct. +fn pad_flag_struct(total_bits: u8, field_flags: &mut Vec) -> u8 { + let remaining = 8 - total_bits % 8; + let total_bytes = if remaining != 8 { + let bsize = format_ident!("B{remaining}"); + field_flags.push(quote! { + #[skip] + unused: #bsize , + }); + (total_bits + remaining) / 8 + } else { + total_bits / 8 + }; + total_bytes +} + +/// Placeholder struct for when there are no bitfields to be added. +fn placeholder_flag_struct(flags: &Ident) -> TokenStream2 { + quote! { + #[derive(Debug, Default)] + struct #flags { + } + + impl #flags { + fn from(mut buf: &[u8]) -> (Self, &[u8]) { + (#flags::default(), buf) + } + fn into_bytes(self) -> [u8; 0] { + [] + } + } + } +} diff --git a/crates/codecs/derive/src/compact/generator.rs b/crates/codecs/derive/src/compact/generator.rs new file mode 100644 index 0000000000..027d9d63ef --- /dev/null +++ b/crates/codecs/derive/src/compact/generator.rs @@ -0,0 +1,121 @@ +//! Code generator for the [`Compact`] trait. + +use super::*; + +/// Generates code to implement the [`Compact`] trait for a data type. +pub fn generate_from_to(ident: &Ident, fields: &FieldList) -> TokenStream2 { + let flags = format_ident!("{ident}Flags"); + + let to_compact = generate_to_compact(fields, ident); + let from_compact = generate_from_compact(fields, ident); + + let fuzz = format_ident!("fuzz_test_{ident}"); + let test = format_ident!("fuzz_{ident}"); + + // Build function + quote! { + + #[cfg(test)] + #[allow(dead_code)] + #[test_fuzz::test_fuzz] + fn #fuzz(obj: #ident) { + let mut buf = vec![]; + let len = obj.clone().to_compact(&mut buf); + let (same_obj, buf) = #ident::from_compact(buf.as_ref(), len); + assert_eq!(obj, same_obj); + } + + #[test] + pub fn #test() { + #fuzz(#ident::default()) + } + + impl Compact for #ident { + fn to_compact(self, buf: &mut impl bytes::BufMut) -> usize { + let mut flags = #flags::default(); + let mut total_len = 0; + #(#to_compact)* + total_len + } + + fn from_compact(mut buf: &[u8], len: usize) -> (Self, &[u8]) { + let (flags, mut buf) = #flags::from(buf); + #(#from_compact)* + (obj, buf) + } + } + } +} + +/// Generates code to implement the [`Compact`] trait method `to_compact`. +fn generate_from_compact(fields: &FieldList, ident: &Ident) -> Vec { + let mut lines = vec![]; + let known_types = ["H256", "H160", "Address", "Bloom", "Vec"]; + + // let mut handle = FieldListHandler::new(fields); + let is_enum = fields.iter().any(|field| matches!(field, FieldTypes::EnumVariant(_))); + + if is_enum { + let enum_lines = EnumHandler::new(fields).generate_from(ident); + + // Builds the object instantiation. + lines.push(quote! { + let obj = match flags.variant() { + #(#enum_lines)* + _ => unreachable!() + }; + }); + } else { + lines.append(&mut StructHandler::new(fields).generate_from(known_types.as_slice())); + + let fields = fields.iter().filter_map(|field| { + if let FieldTypes::StructField((name, _, _)) = field { + let ident = format_ident!("{name}"); + return Some(quote! { + #ident: #ident, + }) + } + None + }); + + // Builds the object instantiation. + lines.push(quote! { + let obj = #ident { + #(#fields)* + }; + }); + } + + lines +} + +/// Generates code to implement the [`Compact`] trait method `from_compact`. +fn generate_to_compact(fields: &FieldList, ident: &Ident) -> Vec { + let mut lines = vec![quote! { + let mut buffer = bytes::BytesMut::new(); + }]; + + let is_enum = fields.iter().any(|field| matches!(field, FieldTypes::EnumVariant(_))); + + if is_enum { + let enum_lines = EnumHandler::new(fields).generate_to(ident); + + lines.push(quote! { + flags.set_variant(match self { + #(#enum_lines)* + }); + }) + } else { + lines.append(&mut StructHandler::new(fields).generate_to()); + } + + // Places the flag bits. + lines.push(quote! { + let flags = flags.into_bytes(); + total_len += flags.len() + buffer.len(); + buf.put_slice(&flags); + buf.put(buffer); + }); + + lines +} diff --git a/crates/codecs/derive/src/compact/mod.rs b/crates/codecs/derive/src/compact/mod.rs new file mode 100644 index 0000000000..f4ca02683e --- /dev/null +++ b/crates/codecs/derive/src/compact/mod.rs @@ -0,0 +1,276 @@ +extern crate proc_macro2; +use proc_macro::{self, TokenStream}; +use proc_macro2::{Ident, TokenStream as TokenStream2}; +use quote::{format_ident, quote}; +use syn::{parse_macro_input, Data, DeriveInput}; + +mod generator; +use generator::*; + +mod enums; +use enums::*; + +mod flags; +use flags::*; + +mod structs; +use structs::*; + +// Helper Alias type +type IsCompact = bool; +// Helper Alias type +type FieldName = String; +// Helper Alias type +type FieldType = String; +// Helper Alias type +type StructFieldDescriptor = (FieldName, FieldType, IsCompact); +// Helper Alias type +type FieldList = Vec; + +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum FieldTypes { + StructField(StructFieldDescriptor), + EnumVariant(String), + EnumUnnamedField(FieldType), +} + +/// Derives the [`Compact`] trait and its from/to implementations. +pub fn derive(input: TokenStream) -> TokenStream { + let mut output = quote! {}; + + let DeriveInput { ident, data, .. } = parse_macro_input!(input); + let fields = get_fields(&data); + output.extend(generate_flag_struct(&ident, &fields)); + output.extend(generate_from_to(&ident, &fields)); + output.into() +} + +/// Given a list of fields on a struct, extract their fields and types. +pub fn get_fields(data: &Data) -> FieldList { + let mut fields = vec![]; + + match data { + Data::Struct(data) => match data.fields { + syn::Fields::Named(ref data_fields) => { + for field in &data_fields.named { + load_field(field, &mut fields, false); + } + assert_eq!(fields.len(), data_fields.named.len()); + } + syn::Fields::Unnamed(ref data_fields) => { + assert!( + data_fields.unnamed.len() == 1, + "Compact only allows one unnamed field. Consider making it a struct." + ); + load_field(&data_fields.unnamed[0], &mut fields, false); + } + syn::Fields::Unit => todo!(), + }, + Data::Enum(data) => { + for variant in &data.variants { + fields.push(FieldTypes::EnumVariant(variant.ident.to_string())); + + match &variant.fields { + syn::Fields::Named(_) => { + panic!("Not allowed to have Enum Variants with multiple named fields. Make it a struct instead.") + } + syn::Fields::Unnamed(data_fields) => { + assert!( + data_fields.unnamed.len() == 1, + "Compact only allows one unnamed field. Consider making it a struct." + ); + load_field(&data_fields.unnamed[0], &mut fields, true); + } + syn::Fields::Unit => (), + } + } + } + Data::Union(_) => todo!(), + } + + fields +} + +fn load_field(field: &syn::Field, fields: &mut FieldList, is_enum: bool) { + if let syn::Type::Path(ref path) = field.ty { + let segments = &path.path.segments; + if !segments.is_empty() { + let mut ftype = String::new(); + for (index, segment) in segments.iter().enumerate() { + ftype.push_str(&segment.ident.to_string()); + if index < segments.len() - 1 { + ftype.push_str("::"); + } + } + + if is_enum { + fields.push(FieldTypes::EnumUnnamedField(ftype.to_string())); + } else { + let should_compact = is_flag_type(&ftype) || + field.attrs.iter().any(|attr| { + attr.path.segments.iter().any(|path| path.ident == "maybe_zero") + }); + + fields.push(FieldTypes::StructField(( + field.ident.as_ref().map(|i| i.to_string()).unwrap_or_default(), + ftype, + should_compact, + ))); + } + } + } +} + +/// Given the field type in a string format, return the amount of bits necessary to save its maximum +/// length. +pub fn get_bit_size(ftype: &str) -> u8 { + if ftype == "u64" || ftype == "BlockNumber" || ftype == "TxNumber" || ftype == "ChainId" { + return 4 + } else if ftype == "TxType" { + return 2 + } else if ftype == "bool" || ftype == "Option" { + return 1 + } else if ftype == "U256" { + return 6 + } + 0 +} + +/// Given the field type in a string format, checks if its type should be added to the +/// StructFlags. +pub fn is_flag_type(ftype: &str) -> bool { + get_bit_size(ftype) > 0 +} + +#[cfg(test)] +mod tests { + use super::*; + use syn::parse2; + + #[test] + fn gen() { + let f_struct = quote! { + #[derive(Debug, PartialEq, Clone)] + pub struct TestStruct { + f_u64: u64, + f_u256: U256, + f_bool_t: bool, + f_bool_f: bool, + f_option_none: Option, + f_option_some: Option, + f_option_some_u64: Option, + f_vec_empty: Vec, + f_vec_some: Vec, + } + }; + + // Generate code that will impl the `Compact` trait. + let mut output = quote! {}; + let DeriveInput { ident, data, .. } = parse2(f_struct).unwrap(); + let fields = get_fields(&data); + output.extend(generate_flag_struct(&ident, &fields)); + output.extend(generate_from_to(&ident, &fields)); + + // Expected output in a TokenStream format. Commas matter! + let should_output = quote! { + #[bitfield] + #[derive(Clone, Copy, Debug, Default)] + struct TestStructFlags { + f_u64_len: B4, + f_u256_len: B6, + f_bool_t_len: B1, + f_bool_f_len: B1, + f_option_none_len: B1, + f_option_some_len: B1, + f_option_some_u64_len: B1, + #[skip] + unused: B1, + } + impl TestStructFlags { + fn from(mut buf: &[u8]) -> (Self, &[u8]) { + ( + TestStructFlags::from_bytes([buf.get_u8(), buf.get_u8(),]), + buf + ) + } + } + #[cfg(test)] + #[allow(dead_code)] + #[test_fuzz::test_fuzz] + fn fuzz_test_TestStruct(obj: TestStruct) { + let mut buf = vec![]; + let len = obj.clone().to_compact(&mut buf); + let (same_obj, buf) = TestStruct::from_compact(buf.as_ref(), len); + assert_eq!(obj, same_obj); + } + #[test] + pub fn fuzz_TestStruct() { + fuzz_test_TestStruct(TestStruct::default()) + } + impl Compact for TestStruct { + fn to_compact(self, buf: &mut impl bytes::BufMut) -> usize { + let mut flags = TestStructFlags::default(); + let mut total_len = 0; + let mut buffer = bytes::BytesMut::new(); + let f_u64_len = self.f_u64.to_compact(&mut buffer); + flags.set_f_u64_len(f_u64_len as u8); + let f_u256_len = self.f_u256.to_compact(&mut buffer); + flags.set_f_u256_len(f_u256_len as u8); + let f_bool_t_len = self.f_bool_t.to_compact(&mut buffer); + flags.set_f_bool_t_len(f_bool_t_len as u8); + let f_bool_f_len = self.f_bool_f.to_compact(&mut buffer); + flags.set_f_bool_f_len(f_bool_f_len as u8); + let f_option_none_len = self.f_option_none.to_compact(&mut buffer); + flags.set_f_option_none_len(f_option_none_len as u8); + let f_option_some_len = self.f_option_some.to_compact(&mut buffer); + flags.set_f_option_some_len(f_option_some_len as u8); + let f_option_some_u64_len = self.f_option_some_u64.to_compact(&mut buffer); + flags.set_f_option_some_u64_len(f_option_some_u64_len as u8); + let f_vec_empty_len = self.f_vec_empty.to_compact(&mut buffer); + let f_vec_some_len = self.f_vec_some.to_compact(&mut buffer); + let flags = flags.into_bytes(); + total_len += flags.len() + buffer.len(); + buf.put_slice(&flags); + buf.put(buffer); + total_len + } + fn from_compact(mut buf: &[u8], len: usize) -> (Self, &[u8]) { + let (flags, mut buf) = TestStructFlags::from(buf); + let mut f_u64 = u64::default(); + (f_u64, buf) = u64::from_compact(buf, flags.f_u64_len() as usize); + let mut f_u256 = U256::default(); + (f_u256, buf) = U256::from_compact(buf, flags.f_u256_len() as usize); + let mut f_bool_t = bool::default(); + (f_bool_t, buf) = bool::from_compact(buf, flags.f_bool_t_len() as usize); + let mut f_bool_f = bool::default(); + (f_bool_f, buf) = bool::from_compact(buf, flags.f_bool_f_len() as usize); + let mut f_option_none = Option::default(); + (f_option_none, buf) = Option::from_compact(buf, flags.f_option_none_len() as usize); + let mut f_option_some = Option::default(); + (f_option_some, buf) = Option::from_compact(buf, flags.f_option_some_len() as usize); + let mut f_option_some_u64 = Option::default(); + (f_option_some_u64, buf) = + Option::from_compact(buf, flags.f_option_some_u64_len() as usize); + let mut f_vec_empty = Vec::default(); + (f_vec_empty, buf) = Vec::from_compact(buf, buf.len()); + let mut f_vec_some = Vec::default(); + (f_vec_some, buf) = Vec::from_compact(buf, buf.len()); + let obj = TestStruct { + f_u64: f_u64, + f_u256: f_u256, + f_bool_t: f_bool_t, + f_bool_f: f_bool_f, + f_option_none: f_option_none, + f_option_some: f_option_some, + f_option_some_u64: f_option_some_u64, + f_vec_empty: f_vec_empty, + f_vec_some: f_vec_some, + }; + (obj, buf) + } + } + }; + + assert_eq!(output.to_string(), should_output.to_string()); + } +} diff --git a/crates/codecs/derive/src/compact/structs.rs b/crates/codecs/derive/src/compact/structs.rs new file mode 100644 index 0000000000..bbc21206ab --- /dev/null +++ b/crates/codecs/derive/src/compact/structs.rs @@ -0,0 +1,114 @@ +use super::*; + +#[derive(Debug)] +pub struct StructHandler<'a> { + fields_iterator: std::iter::Peekable>, + lines: Vec, +} + +impl<'a> StructHandler<'a> { + pub fn new(fields: &'a FieldList) -> Self { + StructHandler { lines: vec![], fields_iterator: fields.iter().peekable() } + } + + pub fn next_field(&mut self) -> Option<&'a FieldTypes> { + self.fields_iterator.next() + } + + pub fn generate_to(mut self) -> Vec { + while let Some(field) = self.next_field() { + match field { + // The following method will advance the + // `fields_iterator` by itself and stop right before the next variant. + FieldTypes::EnumVariant(_) => unreachable!(), + FieldTypes::EnumUnnamedField(_) => unreachable!(), + FieldTypes::StructField(field_descriptor) => self.to(field_descriptor), + } + } + self.lines + } + + pub fn generate_from(mut self, known_types: &[&str]) -> Vec { + while let Some(field) = self.next_field() { + match field { + // The following method will advance the + // `fields_iterator` by itself and stop right before the next variant. + FieldTypes::EnumVariant(_) => unreachable!(), + FieldTypes::EnumUnnamedField(_) => unreachable!(), + FieldTypes::StructField(field_descriptor) => { + self.from(field_descriptor, known_types) + } + } + } + self.lines + } + + /// Generates `to_compact` code for a struct field. + fn to(&mut self, field_descriptor: &StructFieldDescriptor) { + let (name, ftype, is_compact) = field_descriptor; + + let name = format_ident!("{name}"); + let set_len_method = format_ident!("set_{name}_len"); + let len = format_ident!("{name}_len"); + + // H256 with #[maybe_zero] attribute for example + if *is_compact && !is_flag_type(ftype) { + let itype = format_ident!("{ftype}"); + let set_bool_method = format_ident!("set_{name}"); + self.lines.push(quote! { + if self.#name != #itype::zero() { + flags.#set_bool_method(true); + self.#name.to_compact(&mut buffer); + }; + }); + } else { + self.lines.push(quote! { + let #len = self.#name.to_compact(&mut buffer); + }); + } + if is_flag_type(ftype) { + self.lines.push(quote! { + flags.#set_len_method(#len as u8); + }) + } + } + + /// Generates `from_compact` code for a struct field. + fn from(&mut self, field_descriptor: &StructFieldDescriptor, known_types: &[&str]) { + let (name, ftype, is_compact) = field_descriptor; + + let name = format_ident!("{name}"); + let len = format_ident!("{name}_len"); + + assert!( + known_types.contains(&ftype.as_str()) || + is_flag_type(ftype) || + self.fields_iterator.peek().is_none(), + "{ftype} field should be placed as the last one since it's not known. " + ); + + if ftype == "bytes::Bytes" { + self.lines.push(quote! { + let mut #name = bytes::Bytes::new(); + (#name, buf) = bytes::Bytes::from_compact(buf, buf.len() as usize); + }) + } else { + let ident_type = format_ident!("{ftype}"); + self.lines.push(quote! { + let mut #name = #ident_type::default(); + }); + if !is_flag_type(ftype) { + // It's a type that handles its own length requirements. (h256, Custom, ...) + self.lines.push(quote! { + (#name, buf) = #ident_type::from_compact(buf, buf.len()); + }) + } else if *is_compact { + self.lines.push(quote! { + (#name, buf) = #ident_type::from_compact(buf, flags.#len() as usize); + }); + } else { + todo!() + } + } + } +} diff --git a/crates/codecs/derive/src/lib.rs b/crates/codecs/derive/src/lib.rs index dd9b126025..72199f9de5 100644 --- a/crates/codecs/derive/src/lib.rs +++ b/crates/codecs/derive/src/lib.rs @@ -2,6 +2,13 @@ use proc_macro::{self, TokenStream}; use quote::quote; use syn::{parse_macro_input, DeriveInput}; +mod compact; + +#[proc_macro_derive(Compact, attributes(maybe_zero))] +pub fn derive(input: TokenStream) -> TokenStream { + compact::derive(input) +} + #[proc_macro_attribute] #[rustfmt::skip] #[allow(unreachable_code)] @@ -14,6 +21,9 @@ pub fn main_codec(args: TokenStream, input: TokenStream) -> TokenStream { #[cfg(feature = "no_codec")] return no_codec(args, input); + + #[cfg(feature = "compact")] + return use_compact(args, input); // no features no_codec(args, input) @@ -59,6 +69,17 @@ pub fn use_postcard(_args: TokenStream, input: TokenStream) -> TokenStream { .into() } +#[proc_macro_attribute] +pub fn use_compact(_args: TokenStream, input: TokenStream) -> TokenStream { + let ast = parse_macro_input!(input as DeriveInput); + + quote! { + #[derive(Compact, serde::Serialize, serde::Deserialize)] + #ast + } + .into() +} + #[proc_macro_attribute] pub fn no_codec(_args: TokenStream, input: TokenStream) -> TokenStream { let ast = parse_macro_input!(input as DeriveInput); diff --git a/crates/codecs/src/lib.rs b/crates/codecs/src/lib.rs index 6917463530..5ff3f2ea72 100644 --- a/crates/codecs/src/lib.rs +++ b/crates/codecs/src/lib.rs @@ -1 +1,426 @@ +use bytes::{Buf, Bytes}; pub use codecs_derive::*; +use ethers_core::types::{Bloom, H160, H256, U256}; + +/// Trait that implements the `Compact` codec. +/// +/// When deriving the trait for custom structs, be aware of certain limitations/recommendations: +/// * Works best with structs that only have native types (eg. u64, H256, U256). +/// * Fixed array types (H256, Address, Bloom) are not compacted. +/// * Max size of `T` in `Option` or `Vec` shouldn't exceed `0xffff`. +/// * Any `bytes::Bytes` field **should be placed last**. +/// * Any other type which is not known to the derive module **should be placed last**. +/// +/// The last two points make it easier to decode the data without saving the length on the +/// `StructFlags`. It will fail compilation if it's not respected. If they're alias to known types, +/// add their definitions to `get_bit_size()` or `known_types` in `generator.rs`. +pub trait Compact { + /// Takes a buffer which can be written to. *Ideally*, it returns the length written to. + fn to_compact(self, buf: &mut impl bytes::BufMut) -> usize; + /// Takes a buffer which can be read from. Returns the object and `buf` with its internal cursor + /// advanced (eg.`.advance(len)`). + /// + /// `len` can either be the `buf` remaining length, or the length of the compacted type. + /// + /// It will panic, if `len` is smaller than `buf.len()`. + fn from_compact(buf: &[u8], len: usize) -> (Self, &[u8]) + where + Self: Sized; +} + +impl Compact for u64 { + fn to_compact(self, buf: &mut impl bytes::BufMut) -> usize { + let leading = self.leading_zeros() as usize / 8; + buf.put_slice(&self.to_be_bytes()[leading..]); + 8 - leading + } + + fn from_compact(mut buf: &[u8], len: usize) -> (Self, &[u8]) { + if len > 0 { + let mut arr = [0; 8]; + arr[8 - len..].copy_from_slice(&buf[..len]); + + buf.advance(len); + + return (u64::from_be_bytes(arr), buf) + } + (0, buf) + } +} + +impl Compact for Vec +where + T: Compact + Default, +{ + /// Returns 0 since we won't include it in the `StructFlags`. + fn to_compact(self, buf: &mut impl bytes::BufMut) -> usize { + // TODO: can it be smaller? + buf.put_u16(self.len() as u16); + + for element in self { + // TODO: elias fano? + let mut inner = Vec::with_capacity(32); + buf.put_u16(element.to_compact(&mut inner) as u16); + buf.put_slice(&inner); + } + 0 + } + + fn from_compact(mut buf: &[u8], _: usize) -> (Self, &[u8]) { + let mut list = vec![]; + let length = buf.get_u16(); + for _ in 0..length { + #[allow(unused_assignments)] + let mut element = T::default(); + + let len = buf.get_u16(); + (element, buf) = T::from_compact(buf, len as usize); + + list.push(element); + } + + (list, buf) + } +} + +impl Compact for Option +where + T: Compact + Default, +{ + /// Returns 0 for `None` and 1 for `Some(_)`. + fn to_compact(self, buf: &mut impl bytes::BufMut) -> usize { + if let Some(element) = self { + let mut inner = vec![]; + let len = element.to_compact(&mut inner); + buf.put_u16(len as u16); + buf.put_slice(&inner); + return 1 + } + 0 + } + + fn from_compact(mut buf: &[u8], len: usize) -> (Self, &[u8]) { + if len == 0 { + return (None, buf) + } + + let len = buf.get_u16(); + let (element, buf) = T::from_compact(buf, len as usize); + + (Some(element), buf) + } +} + +impl Compact for U256 { + fn to_compact(self, buf: &mut impl bytes::BufMut) -> usize { + let mut inner = vec![0; 32]; + self.to_big_endian(&mut inner); + let size = 32 - (self.leading_zeros() / 8) as usize; + buf.put_slice(&inner[32 - size..]); + size + } + + fn from_compact(mut buf: &[u8], len: usize) -> (Self, &[u8]) { + if len > 0 { + let mut arr = [0; 32]; + arr[(32 - len)..].copy_from_slice(&buf[..len]); + buf.advance(len); + return (U256::from_big_endian(&arr), buf) + } + + (U256::zero(), buf) + } +} + +impl Compact for Bytes { + fn to_compact(self, buf: &mut impl bytes::BufMut) -> usize { + let len = self.len(); + buf.put(self); + len + } + fn from_compact(mut buf: &[u8], len: usize) -> (Self, &[u8]) { + (buf.copy_to_bytes(len), buf) + } +} + +macro_rules! impl_hash_compact { + ($(($name:tt, $size:tt)),+) => { + $( + impl Compact for $name { + fn to_compact(self, buf: &mut impl bytes::BufMut) -> usize { + buf.put_slice(&self.0); + $size + } + + fn from_compact(mut buf: &[u8], len: usize) -> (Self,&[u8]) { + if len == 0 { + return ($name::default(), buf) + } + + let v = $name::from_slice( + buf.get(..$size).expect("size not matching"), + ); + buf.advance($size); + (v, buf) + } + } + )+ + }; +} + +impl_hash_compact!((H256, 32), (H160, 20)); + +impl Compact for Bloom { + fn to_compact(self, buf: &mut impl bytes::BufMut) -> usize { + buf.put_slice(&self.0); + 256 + } + + fn from_compact(mut buf: &[u8], _: usize) -> (Self, &[u8]) { + let result = Bloom::from_slice(&buf[..256]); + buf.advance(256); + (result, buf) + } +} + +impl Compact for bool { + /// `bool` vars go directly to the `StructFlags` and are not written to the buffer. + fn to_compact(self, _: &mut impl bytes::BufMut) -> usize { + self as usize + } + + /// `bool` expects the real value to come in `len`, and does not advance the cursor. + fn from_compact(buf: &[u8], len: usize) -> (Self, &[u8]) { + (len != 0, buf) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use ethers_core::types::Address; + use modular_bitfield::prelude::*; + + #[test] + fn compact_bytes() { + let arr = [1, 2, 3, 4, 5]; + let list = bytes::Bytes::copy_from_slice(&arr); + let mut buf = vec![]; + assert_eq!(list.clone().to_compact(&mut buf), list.len()); + + // Add some noise data. + buf.push(1); + + assert_eq!(&buf[..arr.len()], &arr); + assert_eq!(bytes::Bytes::from_compact(&buf, list.len()), (list, vec![1].as_slice())); + } + + #[test] + fn compact_bloom() { + let mut buf = vec![]; + assert_eq!(Bloom::default().to_compact(&mut buf), 256); + assert_eq!(buf, vec![0; 256]); + + // Add some noise data. + buf.push(1); + + // Bloom shouldn't care about the len passed, since it's not actually compacted. + assert_eq!(Bloom::from_compact(&buf, 1000), (Bloom::default(), vec![1u8].as_slice())); + } + + #[test] + fn compact_address() { + let mut buf = vec![]; + assert_eq!(Address::zero().to_compact(&mut buf), 20); + assert_eq!(buf, vec![0; 20]); + + // Add some noise data. + buf.push(1); + + // Address shouldn't care about the len passed, since it's not actually compacted. + assert_eq!(Address::from_compact(&buf, 1000), (Address::zero(), vec![1u8].as_slice())); + } + + #[test] + fn compact_h256() { + let mut buf = vec![]; + assert_eq!(H256::zero().to_compact(&mut buf), 32); + assert_eq!(buf, vec![0; 32]); + + // Add some noise data. + buf.push(1); + + // H256 shouldn't care about the len passed, since it's not actually compacted. + assert_eq!(H256::from_compact(&buf, 1000), (H256::zero(), vec![1u8].as_slice())); + } + + #[test] + fn compact_bool() { + let _vtrue = true; + let mut buf = vec![]; + + assert_eq!(true.to_compact(&mut buf), 1); + // Bool vars go directly to the `StructFlags` and not written to the buf. + assert_eq!(buf.len(), 0); + + assert_eq!(false.to_compact(&mut buf), 0); + assert_eq!(buf.len(), 0); + + let buf = vec![100u8]; + + // Bool expects the real value to come in `len`, and does not advance the cursor. + assert_eq!(bool::from_compact(&buf, 1), (true, buf.as_slice())); + assert_eq!(bool::from_compact(&buf, 0), (false, buf.as_slice())); + } + + #[test] + fn compact_option() { + let opt = Some(H256::zero()); + let mut buf = vec![]; + + assert_eq!(None::.to_compact(&mut buf), 0); + assert_eq!(opt.to_compact(&mut buf), 1); + + assert_eq!(Option::::from_compact(&buf, 1), (opt, vec![].as_slice())); + + // If `None`, it returns the slice at the same cursor position. + assert_eq!(Option::::from_compact(&buf, 0), (None, buf.as_slice())); + } + + #[test] + fn compact_vec() { + let list = vec![H256::zero(), H256::zero()]; + let mut buf = vec![]; + + // Vec doesn't return a total length + assert_eq!(list.clone().to_compact(&mut buf), 0); + + // Add some noise data in the end that should be returned by `from_compact`. + buf.extend([1u8, 2]); + + let mut remaining_buf = buf.as_slice(); + remaining_buf.advance(2 + 2 + 32 + 2 + 32); + + assert_eq!(Vec::::from_compact(&buf, 0), (list, remaining_buf)); + assert_eq!(remaining_buf, &[1u8, 2]); + } + + #[test] + fn compact_u256() { + let mut buf = vec![]; + + assert_eq!(U256::zero().to_compact(&mut buf), 0); + assert!(buf.is_empty()); + assert_eq!(U256::from_compact(&buf, 0), (U256::zero(), vec![].as_slice())); + + assert_eq!(U256::from(2).to_compact(&mut buf), 1); + assert_eq!(buf, vec![2u8]); + assert_eq!(U256::from_compact(&buf, 1), (U256::from(2), vec![].as_slice())); + } + + #[test] + fn compact_u64() { + let mut buf = vec![]; + + assert_eq!(0u64.to_compact(&mut buf), 0); + assert!(buf.is_empty()); + assert_eq!(u64::from_compact(&buf, 0), (0u64, vec![].as_slice())); + + assert_eq!(2u64.to_compact(&mut buf), 1); + assert_eq!(buf, vec![2u8]); + assert_eq!(u64::from_compact(&buf, 1), (2u64, vec![].as_slice())); + + let mut buf = vec![]; + + assert_eq!(0xffffffffffffffffu64.to_compact(&mut buf), 8); + assert_eq!(&buf, &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]); + assert_eq!(u64::from_compact(&buf, 8), (0xffffffffffffffffu64, vec![].as_slice())); + } + + #[use_compact] + #[derive(Debug, PartialEq, Clone)] + pub struct TestStruct { + f_u64: u64, + f_u256: U256, + f_bool_t: bool, + f_bool_f: bool, + f_option_none: Option, + f_option_some: Option, + f_option_some_u64: Option, + f_vec_empty: Vec, + f_vec_some: Vec, + } + + impl Default for TestStruct { + fn default() -> Self { + TestStruct { + f_u64: 1u64, // 4 bits | 1 byte + f_u256: 1u64.into(), // 6 bits | 1 byte + f_bool_f: false, // 1 bit | 0 bytes + f_bool_t: true, // 1 bit | 0 bytes + f_option_none: None, // 1 bit | 0 bytes + f_option_some: Some(H256::zero()), // 1 bit | 2 + 32 bytes + f_option_some_u64: Some(0xffffu64), // 1 bit | 2 + 2 bytes + f_vec_empty: vec![], // 0 bits | 2 bytes + f_vec_some: vec![H160::zero(), H160::zero()], // 0 bits | 2 + (2+20)*2 bytes + } + } + } + + #[test] + fn compact_test_struct() { + let test = TestStruct::default(); + let mut buf = vec![]; + assert_eq!( + test.to_compact(&mut buf), + 2 + // TestStructFlags + 1 + + 1 + + // 0 + 0 + 0 + + 2 + 32 + + 2 + 2 + + 2 + + 2 + (2 + 20) * 2 + ); + + assert_eq!( + TestStruct::from_compact(&buf, buf.len()), + (TestStruct::default(), vec![].as_slice()) + ); + } + + #[use_compact] + #[derive(Debug, PartialEq, Clone, Default)] + pub enum TestEnum { + #[default] + Var0, + Var1(TestStruct), + Var2(u64), + } + + #[cfg(test)] + #[allow(dead_code)] + #[test_fuzz::test_fuzz] + fn compact_test_enum_all_variants(var0: TestEnum, var1: TestEnum, var2: TestEnum) { + let mut buf = vec![]; + var0.clone().to_compact(&mut buf); + assert_eq!(TestEnum::from_compact(&buf, buf.len()).0, var0); + + let mut buf = vec![]; + var1.clone().to_compact(&mut buf); + assert_eq!(TestEnum::from_compact(&buf, buf.len()).0, var1); + + let mut buf = vec![]; + var2.clone().to_compact(&mut buf); + assert_eq!(TestEnum::from_compact(&buf, buf.len()).0, var2); + } + + #[test] + fn compact_test_enum() { + let var0 = TestEnum::Var0; + let var1 = TestEnum::Var1(TestStruct::default()); + let var2 = TestEnum::Var2(1u64); + + compact_test_enum_all_variants(var0, var1, var2); + } +} diff --git a/crates/db/benches/encoding_crit.rs b/crates/db/benches/encoding_crit.rs index 34272cafa8..76588291b3 100644 --- a/crates/db/benches/encoding_crit.rs +++ b/crates/db/benches/encoding_crit.rs @@ -1,6 +1,6 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; -/// Benchmarks the encoding and decoding of `Header` using criterion. +/// Benchmarks the encoding and decoding of `IntegerList` using criterion. macro_rules! impl_criterion_encoding_benchmark { ($name:tt) => { pub fn criterion_benchmark(c: &mut Criterion) { @@ -8,9 +8,9 @@ macro_rules! impl_criterion_encoding_benchmark { c.bench_function(stringify!($name), |b| { b.iter(|| { let encoded_size = - reth_interfaces::db::codecs::fuzz::Header::encode_and_decode(black_box( - reth_primitives::Header::default(), - )) + reth_interfaces::db::codecs::fuzz::IntegerList::encode_and_decode( + black_box(reth_primitives::IntegerList::default()), + ) .0; if size == 0 { diff --git a/crates/db/benches/encoding_iai.rs b/crates/db/benches/encoding_iai.rs index 1584dc8507..cfa30ff78a 100644 --- a/crates/db/benches/encoding_iai.rs +++ b/crates/db/benches/encoding_iai.rs @@ -5,8 +5,8 @@ use reth_interfaces::db; macro_rules! impl_iai_encoding_benchmark { ($name:tt) => { fn $name() { - db::codecs::fuzz::Header::encode_and_decode(black_box( - reth_primitives::Header::default(), + db::codecs::fuzz::IntegerList::encode_and_decode(black_box( + reth_primitives::IntegerList::default(), )); } diff --git a/crates/interfaces/Cargo.toml b/crates/interfaces/Cargo.toml index 752fc9efca..ea168bb9ff 100644 --- a/crates/interfaces/Cargo.toml +++ b/crates/interfaces/Cargo.toml @@ -28,6 +28,7 @@ futures = "0.3.25" tokio-stream = "0.1.11" rand = "0.8.5" arbitrary = { version = "1.1.7", features = ["derive"], optional = true} +modular-bitfield = "0.11.2" [dev-dependencies] reth-db = { path = "../db", features = ["test-utils"] } diff --git a/crates/interfaces/src/db/codecs/compact.rs b/crates/interfaces/src/db/codecs/compact.rs new file mode 100644 index 0000000000..8dcebfa803 --- /dev/null +++ b/crates/interfaces/src/db/codecs/compact.rs @@ -0,0 +1,36 @@ +use crate::db::{ + models::{accounts::AccountBeforeTx, StoredBlockBody}, + Compress, Decompress, Error, +}; +use reth_codecs::Compact; +use reth_primitives::*; + +/// Implements compression for Compact type. +macro_rules! impl_compression_for_compact { + ($($name:tt),+) => { + $( + impl Compress for $name + { + type Compressed = Vec; + + fn compress(self) -> Self::Compressed { + let mut buf = vec![]; + let _ = Compact::to_compact(self, &mut buf); + buf + } + } + + impl Decompress for $name + { + fn decompress>(value: B) -> Result<$name, Error> { + let value = value.into(); + let (obj, _) = Compact::from_compact(&value, value.len()); + Ok(obj) + } + } + )+ + }; +} + +impl_compression_for_compact!(Header, Account, Log, Receipt, TxType, StorageEntry, StoredBlockBody); +impl_compression_for_compact!(AccountBeforeTx); diff --git a/crates/interfaces/src/db/codecs/fuzz/mod.rs b/crates/interfaces/src/db/codecs/fuzz/mod.rs index 2c3bbfcb31..f18870f366 100644 --- a/crates/interfaces/src/db/codecs/fuzz/mod.rs +++ b/crates/interfaces/src/db/codecs/fuzz/mod.rs @@ -66,6 +66,7 @@ macro_rules! impl_fuzzer_key { /// Fuzzer generates a random instance of the object and proceeds to compress and decompress it. It /// then makes sure that it matches the original object. +#[allow(unused)] macro_rules! impl_fuzzer_value { ($($name:tt),+) => { $( @@ -85,7 +86,5 @@ macro_rules! impl_fuzzer_value_with_input { }; } -impl_fuzzer_value!(Header, Account); - impl_fuzzer_key!(BlockNumHash, TxNumberAddress); impl_fuzzer_value_with_input!((IntegerList, IntegerListInput)); diff --git a/crates/interfaces/src/db/codecs/mod.rs b/crates/interfaces/src/db/codecs/mod.rs index 372ebe7412..6e70ec2094 100644 --- a/crates/interfaces/src/db/codecs/mod.rs +++ b/crates/interfaces/src/db/codecs/mod.rs @@ -1,5 +1,6 @@ //! Integrates different codecs into table::Encode and table::Decode +mod compact; pub mod fuzz; mod postcard; #[cfg(not(feature = "bench-postcard"))] diff --git a/crates/interfaces/src/db/codecs/scale.rs b/crates/interfaces/src/db/codecs/scale.rs index ce8eaa13ff..10ecc72bd1 100644 --- a/crates/interfaces/src/db/codecs/scale.rs +++ b/crates/interfaces/src/db/codecs/scale.rs @@ -1,7 +1,4 @@ -use crate::db::{ - models::{accounts::AccountBeforeTx, StoredBlockBody}, - Compress, Decompress, Error, -}; +use crate::db::{Compress, Decompress, Error}; use parity_scale_codec::decode_from_bytes; use reth_primitives::*; @@ -32,18 +29,8 @@ where } } -/// Implements SCALE both for value and key types. -macro_rules! impl_scale { - ($($name:tt),+) => { - $( - impl ScaleValue for $name {} - impl sealed::Sealed for $name {} - )+ - }; -} - -/// Implements SCALE only for value types. -macro_rules! impl_scale_value { +/// Implements compression for SCALE type. +macro_rules! impl_compression_for_scale { ($($name:tt),+) => { $( impl ScaleValue for $name {} @@ -55,17 +42,6 @@ macro_rules! impl_scale_value { impl ScaleValue for Vec {} impl sealed::Sealed for Vec {} -impl_scale!(U256, H256, H160); -impl_scale!( - Header, - Account, - Log, - Receipt, - TxType, - StorageEntry, - TransactionSigned, - StoredBlockBody -); -impl_scale!(AccountBeforeTx); - -impl_scale_value!(u8, u32, u16, u64); +impl_compression_for_scale!(U256, H256, H160); +impl_compression_for_scale!(TransactionSigned); +impl_compression_for_scale!(u8, u32, u16, u64); diff --git a/crates/interfaces/src/db/models/accounts.rs b/crates/interfaces/src/db/models/accounts.rs index 692c243e13..3abcbcda78 100644 --- a/crates/interfaces/src/db/models/accounts.rs +++ b/crates/interfaces/src/db/models/accounts.rs @@ -8,13 +8,13 @@ use crate::{ impl_fixed_arbitrary, }; use bytes::Bytes; -use reth_codecs::main_codec; +use reth_codecs::{use_compact, Compact}; use reth_primitives::{Account, Address, TxNumber}; use serde::{Deserialize, Serialize}; /// Account as it is saved inside [`AccountChangeSet`]. [`Address`] is the subkey. -#[main_codec] -#[derive(Debug, Default, Clone)] +#[use_compact] +#[derive(Debug, Default, Clone, PartialEq)] pub struct AccountBeforeTx { /// Address for the account. Acts as `DupSort::SubKey`. address: Address, diff --git a/crates/interfaces/src/db/models/blocks.rs b/crates/interfaces/src/db/models/blocks.rs index a2b0c416ef..5b35108c87 100644 --- a/crates/interfaces/src/db/models/blocks.rs +++ b/crates/interfaces/src/db/models/blocks.rs @@ -7,8 +7,9 @@ use crate::{ }, impl_fixed_arbitrary, }; -use bytes::Bytes; -use reth_codecs::main_codec; +use bytes::{Buf, Bytes}; +use modular_bitfield::prelude::*; +use reth_codecs::{use_compact, Compact}; use reth_primitives::{BlockHash, BlockNumber, Header, TxNumber, H256}; use serde::{Deserialize, Serialize}; @@ -22,8 +23,8 @@ pub type NumTransactions = u64; /// /// The [TxNumber]s for all the transactions in the block are `base_tx_id..(base_tx_id + /// tx_amount)`. -#[derive(Debug)] -#[main_codec] +#[derive(Debug, Default, PartialEq, Clone)] +#[use_compact] pub struct StoredBlockBody { /// The ID of the first transaction in the block. pub base_tx_id: TxNumber, diff --git a/crates/primitives/Cargo.toml b/crates/primitives/Cargo.toml index 7bfc359d00..58c0342382 100644 --- a/crates/primitives/Cargo.toml +++ b/crates/primitives/Cargo.toml @@ -40,6 +40,7 @@ sucds = "0.5.0" arbitrary = { version = "1.1.7", features = ["derive"], optional = true } hex = "0.4" hex-literal = "0.3" +modular-bitfield = "0.11.2" derive_more = "0.99" # proof related @@ -52,3 +53,8 @@ hash-db = "0.15" arbitrary = { version = "1.1.7", features = ["derive"] } serde_json = "1.0" hex-literal = "0.3" +test-fuzz = "3.0.4" + +# necessary so we don't hit a "undeclared 'std'": +# https://github.com/foundry-rs/reth/pull/177#discussion_r1021172198 +secp256k1 = "0.24.0" \ No newline at end of file diff --git a/crates/primitives/src/account.rs b/crates/primitives/src/account.rs index f5d667171a..a2b776b520 100644 --- a/crates/primitives/src/account.rs +++ b/crates/primitives/src/account.rs @@ -1,14 +1,17 @@ use crate::{H256, U256}; -use reth_codecs::main_codec; +use bytes::Buf; +use modular_bitfield::prelude::*; +use reth_codecs::{use_compact, Compact}; /// Account saved in database -#[main_codec] +#[use_compact] #[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] pub struct Account { /// Nonce. pub nonce: u64, /// Account balance. pub balance: U256, + #[maybe_zero] /// Hash of the bytecode. pub bytecode_hash: Option, } @@ -19,3 +22,25 @@ impl Account { self.bytecode_hash.is_some() } } + +#[cfg(test)] +mod tests { + use crate::Account; + use reth_codecs::Compact; + + #[test] + fn test_account() { + let mut buf = vec![]; + let mut acc = Account::default(); + let len = acc.to_compact(&mut buf); + assert_eq!(len, 2); + + acc.balance = 2.into(); + let len = acc.to_compact(&mut buf); + assert_eq!(len, 3); + + acc.nonce = 2; + let len = acc.to_compact(&mut buf); + assert_eq!(len, 4); + } +} diff --git a/crates/primitives/src/header.rs b/crates/primitives/src/header.rs index 2836f2e1b8..ab4465edd9 100644 --- a/crates/primitives/src/header.rs +++ b/crates/primitives/src/header.rs @@ -2,14 +2,15 @@ use crate::{ proofs::{EMPTY_LIST_HASH, EMPTY_ROOT}, BlockHash, BlockNumber, Bloom, H160, H256, U256, }; -use bytes::{BufMut, BytesMut}; +use bytes::{Buf, BufMut, BytesMut}; use ethers_core::{types::H64, utils::keccak256}; -use reth_codecs::main_codec; +use modular_bitfield::prelude::*; +use reth_codecs::{use_compact, Compact}; use reth_rlp::{length_of_length, Decodable, Encodable}; use std::ops::Deref; /// Block header -#[main_codec] +#[use_compact] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Header { /// The Keccak 256-bit hash of the parent @@ -48,9 +49,6 @@ pub struct Header { /// A scalar value equal to the reasonable output of Unix’s time() at this block’s inception; /// formally Hs. pub timestamp: u64, - /// An arbitrary byte array containing data relevant to this block. This must be 32 bytes or - /// fewer; formally Hx. - pub extra_data: bytes::Bytes, /// A 256-bit hash which, combined with the /// nonce, proves that a sufficient amount of computation has been carried out on this block; /// formally Hm. @@ -65,6 +63,9 @@ pub struct Header { /// above the gas target, and decreasing when blocks are below the gas target. The base fee per /// gas is burned. pub base_fee_per_gas: Option, + /// An arbitrary byte array containing data relevant to this block. This must be 32 bytes or + /// fewer; formally Hx. + pub extra_data: bytes::Bytes, } impl Default for Header { @@ -262,13 +263,13 @@ impl SealedHeader { #[cfg(test)] mod tests { - use crate::Address; - use super::{Decodable, Encodable, Header, H256}; + use crate::Address; use ethers_core::{ types::Bytes, utils::hex::{self, FromHex}, }; + use std::str::FromStr; #[test] diff --git a/crates/primitives/src/integer_list.rs b/crates/primitives/src/integer_list.rs index 332df43436..abbc7c3b1f 100644 --- a/crates/primitives/src/integer_list.rs +++ b/crates/primitives/src/integer_list.rs @@ -8,7 +8,7 @@ use sucds::{EliasFano, Searial}; /// Uses EliasFano to hold a list of integers. It provides really good compression with the /// capability to access its elements without decoding it. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Default)] pub struct IntegerList(pub EliasFano); impl Deref for IntegerList { diff --git a/crates/primitives/src/log.rs b/crates/primitives/src/log.rs index 992021cc74..52ed0dd86c 100644 --- a/crates/primitives/src/log.rs +++ b/crates/primitives/src/log.rs @@ -1,10 +1,10 @@ use crate::{Address, H256}; -use reth_codecs::main_codec; +use reth_codecs::{use_compact, Compact}; use reth_rlp::{RlpDecodable, RlpEncodable}; /// Ethereum Log -#[main_codec] -#[derive(Clone, Debug, PartialEq, Eq, RlpDecodable, RlpEncodable)] +#[use_compact] +#[derive(Clone, Debug, PartialEq, Eq, RlpDecodable, RlpEncodable, Default)] pub struct Log { /// Contract that emitted this log. pub address: Address, diff --git a/crates/primitives/src/receipt.rs b/crates/primitives/src/receipt.rs index c07a7c7afb..436c1abf61 100644 --- a/crates/primitives/src/receipt.rs +++ b/crates/primitives/src/receipt.rs @@ -1,13 +1,13 @@ use crate::{Bloom, Log, TxType}; use bytes::{Buf, BufMut, BytesMut}; +use modular_bitfield::prelude::*; +use reth_codecs::{use_compact, Compact}; use reth_rlp::{length_of_length, Decodable, Encodable}; - -use reth_codecs::main_codec; use std::cmp::Ordering; /// Receipt containing result of transaction execution. -#[main_codec] -#[derive(Clone, Debug, PartialEq, Eq)] +#[use_compact] +#[derive(Clone, Debug, PartialEq, Eq, Default)] pub struct Receipt { /// Receipt type. pub tx_type: TxType, diff --git a/crates/primitives/src/storage.rs b/crates/primitives/src/storage.rs index 28d6cfc28e..ed5c64255f 100644 --- a/crates/primitives/src/storage.rs +++ b/crates/primitives/src/storage.rs @@ -1,9 +1,11 @@ use super::{H256, U256}; -use reth_codecs::main_codec; +use bytes::Buf; +use modular_bitfield::prelude::*; +use reth_codecs::{use_compact, Compact}; /// Account storage entry. #[derive(Debug, Default, Clone, PartialEq, Eq)] -#[main_codec] +#[use_compact] pub struct StorageEntry { /// Storage key. pub key: H256, diff --git a/crates/primitives/src/transaction/access_list.rs b/crates/primitives/src/transaction/access_list.rs index 098b7167cf..b98a515461 100644 --- a/crates/primitives/src/transaction/access_list.rs +++ b/crates/primitives/src/transaction/access_list.rs @@ -1,8 +1,8 @@ +use crate::{Address, H256}; + use reth_codecs::main_codec; use reth_rlp::{RlpDecodable, RlpDecodableWrapper, RlpEncodable, RlpEncodableWrapper}; -use crate::{Address, H256}; - /// A list of addresses and storage keys that the transaction plans to access. /// Accesses outside the list are possible, but become more expensive. #[main_codec] diff --git a/crates/primitives/src/transaction/tx_type.rs b/crates/primitives/src/transaction/tx_type.rs index 2c0419d0fa..7a00dff9e0 100644 --- a/crates/primitives/src/transaction/tx_type.rs +++ b/crates/primitives/src/transaction/tx_type.rs @@ -1,13 +1,35 @@ -use reth_codecs::main_codec; +use reth_codecs::{main_codec, Compact}; /// Transaction Type #[main_codec] -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Default)] pub enum TxType { /// Legacy transaction pre EIP-2929 + #[default] Legacy = 0_isize, /// AccessList transaction EIP2930 = 1_isize, /// Transaction with Priority fee EIP1559 = 2_isize, } + +impl Compact for TxType { + fn to_compact(self, _: &mut impl bytes::BufMut) -> usize { + match self { + TxType::Legacy => 0, + TxType::EIP2930 => 1, + _ => 2, + } + } + + fn from_compact(buf: &[u8], identifier: usize) -> (Self, &[u8]) { + ( + match identifier { + 0 => TxType::Legacy, + 1 => TxType::EIP2930, + _ => TxType::EIP1559, + }, + buf, + ) + } +}