From 5488e7b6a4d20e88072953e4ca2432f3c206e72c Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 31 Jan 2019 10:12:43 -0600 Subject: [PATCH] SSZ list Merkle hashing change The current spec is arguably inconsistent, in that if a set of N values gets chunked into M chunks where M is not an exact power of 2, the chunks between M and next_power_of_2(M) are filled with SSZ_CHUNK_SIZE zero bytes each, but the last chunk is not padded, and could be arbitrarily short (eg. if the values are 4 bytes and there are 257 of them, then that gets serialized into eight chunks chunks where the first four are 64 values each, the fifth is 4 bytes corresponding to the last value, and the last three chunks are SSZ_CHUNK_SIZE zero bytes). This PR fills every chunk up to exactly SSZ_CHUNK_SIZE bytes for consistency. --- specs/simple-serialize.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 13cc47299..2296a47c1 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -385,7 +385,7 @@ Return the hash of the serialization of the value. #### List/Vectors -First, we define some helpers and then the Merkle tree function. +First, we define some helpers and then the Merkle tree function. `zpad(input: bytes, length: int) -> bytes` is a helper that extends the given `bytes` value to the desired `length` by adding zero bytes on the right. ```python # Merkle tree hash of a list of homogenous, non-empty items @@ -401,7 +401,7 @@ def merkle_hash(lst): items_per_chunk = SSZ_CHUNK_SIZE // len(lst[0]) # Build a list of chunks based on the number of items in the chunk - chunkz = [b''.join(lst[i:i+items_per_chunk]) for i in range(0, len(lst), items_per_chunk)] + chunkz = [zpad(b''.join(lst[i:i+items_per_chunk]), SSZ_CHUNK_SIZE) for i in range(0, len(lst), items_per_chunk)] else: # Leave large items alone chunkz = lst