From 2605dfba082ed57ad08951ab393483d6bf64a4f4 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Mon, 17 Jun 2019 11:16:00 -0400 Subject: [PATCH 001/130] Updates to SSZ partials --- specs/light_client/merkle_proofs.md | 226 +++++++++++++++++----------- 1 file changed, 140 insertions(+), 86 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index f009d9737..85d859a54 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -64,43 +64,71 @@ y_data_root len(y) We can now define a concept of a "path", a way of describing a function that takes as input an SSZ object and outputs some specific (possibly deeply nested) member. For example, `foo -> foo.x` is a path, as are `foo -> len(foo.y)` and `foo -> foo.y[5].w`. We'll describe paths as lists, which can have two representations. In "human-readable form", they are `["x"]`, `["y", "__len__"]` and `["y", 5, "w"]` respectively. In "encoded form", they are lists of `uint64` values, in these cases (assuming the fields of `foo` in order are `x` then `y`, and `w` is the first field of `y[i]`) `[0]`, `[1, 2**64-1]`, `[1, 5, 0]`. ```python -def path_to_encoded_form(obj: Any, path: List[Union[str, int]]) -> List[int]: - if len(path) == 0: - return [] - elif isinstance(path[0], "__len__"): - assert len(path) == 1 - return [LENGTH_FLAG] - elif isinstance(path[0], str) and hasattr(obj, "fields"): - return [list(obj.fields.keys()).index(path[0])] + path_to_encoded_form(getattr(obj, path[0]), path[1:]) - elif isinstance(obj, (Vector, List)): - return [path[0]] + path_to_encoded_form(obj[path[0]], path[1:]) +def item_length(typ: Type) -> int: + """ + Returns the number of bytes in a basic type, or 32 (a full hash) for compound types. + """ + if typ == bool: + return 1 + elif issubclass(typ, uint): + return typ.byte_len else: - raise Exception("Unknown type / path") -``` - -We can now define a function `get_generalized_indices(object: Any, path: List[int], root: int=1) -> List[int]` that converts an object and a path to a set of generalized indices (note that for constant-sized objects, there is only one generalized index and it only depends on the path, but for dynamically sized objects the indices may depend on the object itself too). For dynamically-sized objects, the set of indices will have more than one member because of the need to access an array's length to determine the correct generalized index for some array access. - -```python -def get_generalized_indices(obj: Any, path: List[int], root: int=1) -> List[int]: - if len(path) == 0: - return [root] - elif isinstance(obj, Vector): - items_per_chunk = (32 // len(serialize(x))) if isinstance(x, int) else 1 - new_root = root * next_power_of_2(len(obj) // items_per_chunk) + path[0] // items_per_chunk - return get_generalized_indices(obj[path[0]], path[1:], new_root) - elif isinstance(obj, List) and path[0] == LENGTH_FLAG: - return [root * 2 + 1] - elif isinstance(obj, List) and isinstance(path[0], int): - assert path[0] < len(obj) - items_per_chunk = (32 // len(serialize(x))) if isinstance(x, int) else 1 - new_root = root * 2 * next_power_of_2(len(obj) // items_per_chunk) + path[0] // items_per_chunk - return [root *2 + 1] + get_generalized_indices(obj[path[0]], path[1:], new_root) - elif hasattr(obj, "fields"): - field = list(fields.keys())[path[0]] - new_root = root * next_power_of_2(len(fields)) + path[0] - return get_generalized_indices(getattr(obj, field), path[1:], new_root) + return 32 + + +def get_elem_type(typ: Type, index: int) -> Type: + """ + Returns the type of the element of an object of the given type with the given index + or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) + """ + return typ.get_fields_dict()[index] if is_container_type(typ) else typ.elem_type + + +def get_chunk_count(typ: Type) -> int: + """ + Returns the number of hashes needed to represent the top-level elements in the given type + (eg. `x.foo` or `x[7]` but not `x[7].bar` or `x.foo.baz`). In all cases except lists/vectors + of basic types, this is simply the number of top-level elements, as each element gets one + hash. For lists/vectors of basic types, it is often fewer because multiple basic elements + can be packed into one 32-byte chunk. + """ + if is_basic_type(typ): + return 1 + elif is_list_kind(typ) or is_vector_kind(typ): + return (typ.length * item_length(typ.elem_type) + 31) // 32 else: - raise Exception("Unknown type / path") + return len(typ.get_fields()) + + +def get_item_position(typ: Type, index: Union[int, str]) -> Tuple[int, int, int]: + """ + Returns three variables: (i) the index of the chunk in which the given element of the item is + represented, (ii) the starting byte position, (iii) the ending byte position. For example for + a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16) + """ + if is_list_kind(typ) or is_vector_kind(typ): + start = index * item_length(typ.elem_type) + return start // 32, start % 32, start % 32 + item_length(typ.elem_type) + elif is_container_type(typ): + return typ.get_field_names().index(index), 0, item_length(get_elem_type(typ, index)) + else: + raise Exception("Only lists/vectors/containers supported") + + +def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> int: + """ + Converts a path (eg. `[7, "foo", 3]` for `x[7].foo[3]`, `[12, "bar", "__len__"]` for + `len(x[12].bar)`) into the generalized index representing its position in the Merkle tree. + """ + for p in path: + assert not is_basic_type(typ) # If we descend to a basic type, the path cannot continue further + if p == '__len__': + typ, root = uint256, root * 2 + 1 if is_list_kind(typ) else None + else: + pos, _, _ = get_item_position(typ, p) + root = root * (2 if is_list_kind(typ) else 1) * next_power_of_two(get_chunk_count(typ)) + pos + typ = get_elem_type(typ, p) + return root ``` ## Merkle multiproofs @@ -116,72 +144,98 @@ x x . . . . x * . are unused nodes, * are used nodes, x are the values we are trying to prove. Notice how despite being a multiproof for 3 values, it requires only 3 auxiliary nodes, only one node more than would be required to prove a single value. Normally the efficiency gains are not quite that extreme, but the savings relative to individual Merkle proofs are still significant. As a rule of thumb, a multiproof for k nodes at the same level of an n-node tree has size `k * (n/k + log(n/k))`. -Here is code for creating and verifying a multiproof. First, a method for computing the generalized indices of the auxiliary tree nodes that a proof of a given set of generalized indices will require: +First, we provide a method for computing the generalized indices of the auxiliary tree nodes that a proof of a given set of generalized indices will require: -```python -def get_proof_indices(tree_indices: List[int]) -> List[int]: - # Get all indices touched by the proof - maximal_indices = set() - for i in tree_indices: - x = i - while x > 1: - maximal_indices.add(x ^ 1) - x //= 2 - maximal_indices = tree_indices + sorted(list(maximal_indices))[::-1] - # Get indices that cannot be recalculated from earlier indices - redundant_indices = set() - proof = [] - for index in maximal_indices: - if index not in redundant_indices: - proof.append(index) - while index > 1: - redundant_indices.add(index) - if (index ^ 1) not in redundant_indices: - break - index //= 2 - return [i for i in proof if i not in tree_indices] +``` +def get_branch_indices(tree_index: int) -> List[int]: + """ + Get the generalized indices of the sister chunks along the path from the chunk with the + given tree index to the root. + """ + o = [tree_index ^ 1] + while o[-1] > 1: + o.append((o[-1] // 2) ^ 1) + return o[:-1] + +def get_expanded_indices(indices: List[int]) -> List[int]: + """ + Get the generalized indices of all chunks in the tree needed to prove the chunks with the given + generalized indices. + """ + branches = set() + for index in indices: + branches = branches.union(set(get_branch_indices(index) + [index])) + return sorted(list([x for x in branches if x*2 not in branches or x*2+1 not in branches]))[::-1] ``` -Generating a proof is simply a matter of taking the node of the SSZ hash tree with the union of the given generalized indices for each index given by `get_proof_indices`, and outputting the list of nodes in the same order. +Generating a proof that covers paths `p1 ... pn` is simply a matter of taking the chunks in the SSZ hash tree with generalized indices `get_expanded_indices([p1 ... pn])`. -Here is the verification function: +We now provide the bulk of the proving machinery, a function that takes a `{generalized_index: chunk}` map and fills in chunks that can be inferred (inferring the parent by hashing its two children): ```python -def verify_multi_proof(root: Bytes32, indices: List[int], leaves: List[Bytes32], proof: List[Bytes32]) -> bool: - tree = {} - for index, leaf in zip(indices, leaves): - tree[index] = leaf - for index, proof_item in zip(get_proof_indices(indices), proof): - tree[index] = proof_item - index_queue = sorted(tree.keys())[:-1] - i = 0 - while i < len(index_queue): - index = index_queue[i] - if index >= 2 and index ^ 1 in tree: - tree[index // 2] = hash(tree[index - index % 2] + tree[index - index % 2 + 1]) - index_queue.append(index // 2) - i += 1 - return (indices == []) or (1 in tree and tree[1] == root) +def fill(objects: Dict[int, Bytes32]) -> Dict[int, Bytes32]: + """ + Fills in chunks that can be inferred from other chunks. For a set of chunks that constitutes + a valid proof, this includes the root (generalized index 1). + """ + objects = {k: v for k, v in objects.items()} + keys = sorted(objects.keys())[::-1] + pos = 0 + while pos < len(keys): + k = keys[pos] + if k in objects and k ^ 1 in objects and k // 2 not in objects: + objects[k // 2] = hash(objects[k & - 2] + objects[k | 1]) + keys.append(k // 2) + pos += 1 + # Completeness and consistency check + assert 1 in objects + for k in objects: + if k > 1: + assert objects[k // 2] == hash(objects[k & -2] + objects[k | 1]) + return objects ``` ## MerklePartial -We define: +We define a container that encodes an SSZ partial, and provide the methods for converting it into a `{generalized_index: chunk}` map, for which we provide a method to extract individual values. To determine the hash tree root of an object represented by an SSZ partial, simply check `decode_ssz_partial(partial)[1]`. ### `SSZMerklePartial` - ```python -{ - "root": "bytes32", - "indices": ["uint64"], - "values": ["bytes32"], - "proof": ["bytes32"] -} +class SSZMerklePartial(Container): + indices: List[uint64, 2**32] + chunks: List[Bytes32, 2**32] ``` -### Proofs for execution +### `decode_ssz_partial` -We define `MerklePartial(f, arg1, arg2..., focus=0)` as being a `SSZMerklePartial` object wrapping a Merkle multiproof of the set of nodes in the hash tree of the SSZ object `arg[focus]` that is needed to authenticate the parts of the object needed to compute `f(arg1, arg2...)`. +```python +def decode_ssz_partial(encoded: SSZMerklePartial) -> Dict[int, Bytes32]: + """ + Decodes an encoded SSZ partial into a generalized index -> chunk map, and verify hash consistency. + """ + full_indices = get_expanded_indices(encoded.indices) + return fill({k:v for k,v in zip(full_indices, encoded.chunks)}) +``` -Ideally, any function which accepts an SSZ object should also be able to accept a `SSZMerklePartial` object as a substitute. +### `extract_value_at_path` + +```python +def extract_value_at_path(chunks: Dict[int, Bytes32], typ: Type, path: List[Union[int, str]]) -> Any: + """ + Provides the value of the element in the object represented by the given encoded SSZ partial at + the given path. Returns a KeyError if that path is not covered by this SSZ partial. + """ + root = 1 + for p in path: + if p == '__len__': + return deserialize_basic(chunks[root * 2 + 1][:8], uint64) + if is_list_kind(typ): + assert 0 <= p < deserialize_basic(chunks[root * 2 + 1][:8], uint64) + pos, start, end = get_item_position(typ, p) + root = root * (2 if is_list_kind(typ) else 1) * next_power_of_two(get_chunk_count(typ)) + pos + typ = get_elem_type(typ, p) + return deserialize_basic(chunks[root][start: end], typ) +``` + +Here [link TBD] is a python implementation of SSZ partials that represents them as a class that can be read and written to just like the underlying objects, so you can eg. perform state transitions on SSZ partials and compute the resulting root From 7e3318318d0d2a534f11cc2d0ff671cfe211fd1f Mon Sep 17 00:00:00 2001 From: vbuterin Date: Sun, 23 Jun 2019 00:29:45 -0400 Subject: [PATCH 002/130] Updated to newer SSZ --- specs/light_client/merkle_proofs.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 85d859a54..b058be7ca 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -94,7 +94,7 @@ def get_chunk_count(typ: Type) -> int: """ if is_basic_type(typ): return 1 - elif is_list_kind(typ) or is_vector_kind(typ): + elif issubclass(typ, (List, Vector, Bytes, BytesN)): return (typ.length * item_length(typ.elem_type) + 31) // 32 else: return len(typ.get_fields()) @@ -106,7 +106,7 @@ def get_item_position(typ: Type, index: Union[int, str]) -> Tuple[int, int, int] represented, (ii) the starting byte position, (iii) the ending byte position. For example for a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16) """ - if is_list_kind(typ) or is_vector_kind(typ): + if issubclass(typ, (List, Vector, Bytes, BytesN)): start = index * item_length(typ.elem_type) return start // 32, start % 32, start % 32 + item_length(typ.elem_type) elif is_container_type(typ): @@ -123,10 +123,10 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> int: for p in path: assert not is_basic_type(typ) # If we descend to a basic type, the path cannot continue further if p == '__len__': - typ, root = uint256, root * 2 + 1 if is_list_kind(typ) else None + typ, root = uint256, root * 2 + 1 if issubclass(typ, (List, Bytes)) else None else: pos, _, _ = get_item_position(typ, p) - root = root * (2 if is_list_kind(typ) else 1) * next_power_of_two(get_chunk_count(typ)) + pos + root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(get_chunk_count(typ)) + pos typ = get_elem_type(typ, p) return root ``` @@ -230,10 +230,10 @@ def extract_value_at_path(chunks: Dict[int, Bytes32], typ: Type, path: List[Unio for p in path: if p == '__len__': return deserialize_basic(chunks[root * 2 + 1][:8], uint64) - if is_list_kind(typ): + if iissubclass(typ, (List, Bytes)): assert 0 <= p < deserialize_basic(chunks[root * 2 + 1][:8], uint64) pos, start, end = get_item_position(typ, p) - root = root * (2 if is_list_kind(typ) else 1) * next_power_of_two(get_chunk_count(typ)) + pos + root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(get_chunk_count(typ)) + pos typ = get_elem_type(typ, p) return deserialize_basic(chunks[root][start: end], typ) ``` From 1644ae61255ce25ddc318a299ab0a67e42b068ee Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Wed, 10 Jul 2019 16:46:59 +0800 Subject: [PATCH 003/130] Clarify the illegal types --- specs/simple-serialize.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 8efd08c0a..0419a524e 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -89,7 +89,7 @@ An SSZ object is called empty (and thus, `is_empty(object)` returns true) if it ### Illegal types -Empty vector types (i.e. `[subtype, 0]` for some `subtype`) are not legal. The `null` type is only legal as the first type in a union subtype (i.e. with type index zero). +The empty `List[subtype, N]`, `Vector[subtype, N]`, `Bitlist[N]`, and `Bitvector[N]` types, where `N == 0` are not legal. The `null` type is only legal as the first type in a union subtype (i.e. with type index zero). ## Serialization From 25ec084cc58e97f9c797068c1cd65013b8b7e0be Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Wed, 10 Jul 2019 17:20:50 +0800 Subject: [PATCH 004/130] To fit in `MAX_TRANSFERS=0` case --- specs/simple-serialize.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 0419a524e..db7b5ad75 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -89,7 +89,7 @@ An SSZ object is called empty (and thus, `is_empty(object)` returns true) if it ### Illegal types -The empty `List[subtype, N]`, `Vector[subtype, N]`, `Bitlist[N]`, and `Bitvector[N]` types, where `N == 0` are not legal. The `null` type is only legal as the first type in a union subtype (i.e. with type index zero). +The empty `Vector[subtype, N]` and `Bitvector[N]` types, where `N == 0` are not legal. The `null` type is only legal as the first type in a union subtype (i.e. with type index zero). ## Serialization From 01af3044032735fa9266d08656090f1bc1e09351 Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 20 Jul 2019 02:13:31 +0200 Subject: [PATCH 005/130] =?UTF-8?q?Found=20by=20Cem=20=C3=96zer:=20Ignore?= =?UTF-8?q?=20older=20latest=20messages=20in=20attesting=20balance=20sum,?= =?UTF-8?q?=20instead=20of=20assertion=20error?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- specs/core/0_fork-choice.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/specs/core/0_fork-choice.md b/specs/core/0_fork-choice.md index 9fd8ab53e..00374fee9 100644 --- a/specs/core/0_fork-choice.md +++ b/specs/core/0_fork-choice.md @@ -101,8 +101,12 @@ def get_genesis_store(genesis_state: BeaconState) -> Store: ```python def get_ancestor(store: Store, root: Hash, slot: Slot) -> Hash: block = store.blocks[root] - assert block.slot >= slot - return root if block.slot == slot else get_ancestor(store, block.parent_root, slot) + if block.slot > slot: + return get_ancestor(store, block.parent_root, slot) + elif block.slot == slot: + return root + else: + return Bytes32() # root is older than queried slot: no results. ``` #### `get_latest_attesting_balance` From 43a0ca4eebf0532a130b8273a3c85e90e845bb34 Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Mon, 22 Jul 2019 07:13:07 -0600 Subject: [PATCH 006/130] check bit lengths --- specs/core/0_beacon-chain.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index f0169f1d2..c17815ee8 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -1654,6 +1654,9 @@ def process_attestation(state: BeaconState, attestation: Attestation) -> None: attestation_slot = get_attestation_data_slot(state, data) assert attestation_slot + MIN_ATTESTATION_INCLUSION_DELAY <= state.slot <= attestation_slot + SLOTS_PER_EPOCH + assert len(attestation.aggregation_bits) == len(committee) + assert len(attestation.custody_bits) == len(committee) + pending_attestation = PendingAttestation( data=data, aggregation_bits=attestation.aggregation_bits, From a90d273fbd3bc952892459a03284242de65022c8 Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Mon, 22 Jul 2019 07:19:42 -0600 Subject: [PATCH 007/130] fix minor var typo --- specs/validator/0_beacon-chain-validator.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/specs/validator/0_beacon-chain-validator.md b/specs/validator/0_beacon-chain-validator.md index 58d87d450..188a6a291 100644 --- a/specs/validator/0_beacon-chain-validator.md +++ b/specs/validator/0_beacon-chain-validator.md @@ -322,13 +322,13 @@ Set `attestation.data = attestation_data` where `attestation_data` is the `Attes ##### Aggregation bits -- Let `attestation.aggregation_bits` be a `Bitlist[MAX_INDICES_PER_ATTESTATION]` where the bits at the index in the aggregated validator's `committee` is set to `0b1`. +- Let `attestation.aggregation_bits` be a `Bitlist[MAX_VALIDATORS_PER_COMMITTEE]` where the bits at the index in the aggregated validator's `committee` is set to `0b1`. *Note*: Calling `get_attesting_indices(state, attestation.data, attestation.aggregation_bits)` should return a list of length equal to 1, containing `validator_index`. ##### Custody bits -- Let `attestation.custody_bits` be a `Bitlist[MAX_INDICES_PER_ATTESTATION]` filled with zeros of length `len(committee)`. +- Let `attestation.custody_bits` be a `Bitlist[MAX_VALIDATORS_PER_COMMITTEE]` filled with zeros of length `len(committee)`. *Note*: This is a stub for Phase 0. From f229d21faae0e59e9769ed361ffc39d30be84a3e Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 11 Jul 2019 20:51:34 +0800 Subject: [PATCH 008/130] Bump `py-ssz` to `0.1.0a11` and update the fuzzing tests --- test_libs/pyspec/eth2spec/fuzzing/decoder.py | 6 +++--- test_libs/pyspec/eth2spec/fuzzing/test_decoder.py | 4 +--- test_libs/pyspec/requirements.txt | 2 +- test_libs/pyspec/setup.py | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/test_libs/pyspec/eth2spec/fuzzing/decoder.py b/test_libs/pyspec/eth2spec/fuzzing/decoder.py index ccca17385..e3b4de318 100644 --- a/test_libs/pyspec/eth2spec/fuzzing/decoder.py +++ b/test_libs/pyspec/eth2spec/fuzzing/decoder.py @@ -19,13 +19,13 @@ def translate_typ(typ) -> ssz.BaseSedes: return ssz.Vector(translate_typ(typ.elem_type), typ.length) elif issubclass(typ, spec_ssz.List): # TODO: Make py-ssz List support the new fixed length list - return ssz.List(translate_typ(typ.elem_type)) + return ssz.List(translate_typ(typ.elem_type), typ.length) elif issubclass(typ, spec_ssz.Bitlist): # TODO: Once Bitlist implemented in py-ssz, use appropriate type - return ssz.List(translate_typ(typ.elem_type)) + return ssz.Bitlist(typ.length) elif issubclass(typ, spec_ssz.Bitvector): # TODO: Once Bitvector implemented in py-ssz, use appropriate type - return ssz.Vector(translate_typ(typ.elem_type), typ.length) + return ssz.Bitvector(typ.length) elif issubclass(typ, spec_ssz.boolean): return ssz.boolean elif issubclass(typ, spec_ssz.uint): diff --git a/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py b/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py index ea1f1d47f..e68133bd6 100644 --- a/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py +++ b/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py @@ -9,9 +9,7 @@ def test_decoder(): rng = Random(123) # check these types only, Block covers a lot of operation types already. - # TODO: Once has Bitlists and Bitvectors, add back - # spec.BeaconState and spec.BeaconBlock - for typ in [spec.IndexedAttestation, spec.AttestationDataAndCustodyBit]: + for typ in [spec.BeaconState, spec.BeaconBlock]: # create a random pyspec value original = random_value.get_random_ssz_object(rng, typ, 100, 10, mode=random_value.RandomizationMode.mode_random, diff --git a/test_libs/pyspec/requirements.txt b/test_libs/pyspec/requirements.txt index 713b4331a..5aeea4cab 100644 --- a/test_libs/pyspec/requirements.txt +++ b/test_libs/pyspec/requirements.txt @@ -3,4 +3,4 @@ eth-typing>=2.1.0,<3.0.0 pycryptodome==3.7.3 py_ecc==1.7.1 dataclasses==0.6 -ssz==0.1.0a10 +ssz==0.1.0a11 diff --git a/test_libs/pyspec/setup.py b/test_libs/pyspec/setup.py index 07e538e80..9bdbdb2e9 100644 --- a/test_libs/pyspec/setup.py +++ b/test_libs/pyspec/setup.py @@ -9,7 +9,7 @@ setup( "eth-typing>=2.1.0,<3.0.0", "pycryptodome==3.7.3", "py_ecc==1.7.1", - "ssz==0.1.0a10", + "ssz==0.1.0a11", "dataclasses==0.6", ] ) From b08ecb018a97a5a50581e4fcac218f7c65f46360 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Sat, 13 Jul 2019 12:29:08 +0800 Subject: [PATCH 009/130] Apply suggestions from code review Co-Authored-By: Diederik Loerakker --- test_libs/pyspec/eth2spec/fuzzing/decoder.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/test_libs/pyspec/eth2spec/fuzzing/decoder.py b/test_libs/pyspec/eth2spec/fuzzing/decoder.py index e3b4de318..272ed0c44 100644 --- a/test_libs/pyspec/eth2spec/fuzzing/decoder.py +++ b/test_libs/pyspec/eth2spec/fuzzing/decoder.py @@ -18,13 +18,10 @@ def translate_typ(typ) -> ssz.BaseSedes: elif issubclass(typ, spec_ssz.Vector): return ssz.Vector(translate_typ(typ.elem_type), typ.length) elif issubclass(typ, spec_ssz.List): - # TODO: Make py-ssz List support the new fixed length list return ssz.List(translate_typ(typ.elem_type), typ.length) elif issubclass(typ, spec_ssz.Bitlist): - # TODO: Once Bitlist implemented in py-ssz, use appropriate type return ssz.Bitlist(typ.length) elif issubclass(typ, spec_ssz.Bitvector): - # TODO: Once Bitvector implemented in py-ssz, use appropriate type return ssz.Bitvector(typ.length) elif issubclass(typ, spec_ssz.boolean): return ssz.boolean From db9091f507a8a83a37b0c3ae4c3ecde4e9c68c8c Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Sat, 13 Jul 2019 12:31:41 +0800 Subject: [PATCH 010/130] Add `AttestationDataAndCustodyBit` back --- test_libs/pyspec/eth2spec/fuzzing/test_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py b/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py index e68133bd6..c38fd69af 100644 --- a/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py +++ b/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py @@ -9,7 +9,7 @@ def test_decoder(): rng = Random(123) # check these types only, Block covers a lot of operation types already. - for typ in [spec.BeaconState, spec.BeaconBlock]: + for typ in [spec.AttestationDataAndCustodyBit, spec.BeaconState, spec.BeaconBlock]: # create a random pyspec value original = random_value.get_random_ssz_object(rng, typ, 100, 10, mode=random_value.RandomizationMode.mode_random, From 82e2c559ca0d382c734b2abcf9f9196c6a491794 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Mon, 15 Jul 2019 11:33:09 +0800 Subject: [PATCH 011/130] ssz==0.1.3 --- test_generators/ssz_generic/requirements.txt | 2 +- test_libs/pyspec/requirements.txt | 2 +- test_libs/pyspec/setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test_generators/ssz_generic/requirements.txt b/test_generators/ssz_generic/requirements.txt index dcdb0824f..94c019c4e 100644 --- a/test_generators/ssz_generic/requirements.txt +++ b/test_generators/ssz_generic/requirements.txt @@ -1,4 +1,4 @@ eth-utils==1.6.0 ../../test_libs/gen_helpers ../../test_libs/config_helpers -ssz==0.1.0a2 +ssz==0.1.3 diff --git a/test_libs/pyspec/requirements.txt b/test_libs/pyspec/requirements.txt index 5aeea4cab..480602779 100644 --- a/test_libs/pyspec/requirements.txt +++ b/test_libs/pyspec/requirements.txt @@ -3,4 +3,4 @@ eth-typing>=2.1.0,<3.0.0 pycryptodome==3.7.3 py_ecc==1.7.1 dataclasses==0.6 -ssz==0.1.0a11 +ssz==0.1.3 diff --git a/test_libs/pyspec/setup.py b/test_libs/pyspec/setup.py index 9bdbdb2e9..94575f2a1 100644 --- a/test_libs/pyspec/setup.py +++ b/test_libs/pyspec/setup.py @@ -9,7 +9,7 @@ setup( "eth-typing>=2.1.0,<3.0.0", "pycryptodome==3.7.3", "py_ecc==1.7.1", - "ssz==0.1.0a11", + "ssz==0.1.3", "dataclasses==0.6", ] ) From 62138fed347aed04833e7c25d2a4aa802ae4cfb2 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 25 Jul 2019 17:26:27 +0800 Subject: [PATCH 012/130] Update Illegal types --- specs/simple-serialize.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index e62dbcd7f..cfc1a4aa2 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -75,7 +75,7 @@ For convenience we alias: * `bit` to `boolean` * `byte` to `uint8` (this is a basic type) * `BytesN` to `Vector[byte, N]` (this is *not* a basic type) -* `null`: `{}`, i.e. the empty container +* `null`: `{}` ### Default values @@ -87,7 +87,9 @@ An SSZ object is called empty (and thus, `is_empty(object)` returns true) if it ### Illegal types -The empty `Vector[subtype, N]` and `Bitvector[N]` types, where `N == 0` are not legal. The `null` type is only legal as the first type in a union subtype (i.e. with type index zero). +- Empty vector types (`Vector[type, 0]`, `Bitvector[0]`) are illegal. +- Containers with no fields are illegal. +- The `null` type is only legal as the first type in a union subtype (i.e. with type index zero). ## Serialization From f336e7ffb33e736b6f201d98562c833e1e0b5611 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 25 Jul 2019 17:32:27 +0800 Subject: [PATCH 013/130] Rename `is_empty` to `is_zero` --- scripts/build_spec.py | 2 +- specs/core/1_custody-game.md | 2 +- specs/simple-serialize.md | 6 +++--- test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 8b541ff50..c221a5fba 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -49,7 +49,7 @@ from eth2spec.utils.ssz.ssz_impl import ( hash_tree_root, signing_root, serialize, - is_empty, + is_zero, ) from eth2spec.utils.ssz.ssz_typing import ( bit, boolean, Container, List, Vector, Bytes, uint64, diff --git a/specs/core/1_custody-game.md b/specs/core/1_custody-game.md index f79977442..63900681e 100644 --- a/specs/core/1_custody-game.md +++ b/specs/core/1_custody-game.md @@ -328,7 +328,7 @@ def get_reveal_period(state: BeaconState, validator_index: ValidatorIndex, epoch ```python def replace_empty_or_append(list: MutableSequence[Any], new_element: Any) -> int: for i in range(len(list)): - if is_empty(list[i]): + if is_zero(list[i]): list[i] = new_element return i list.append(new_element) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index cfc1a4aa2..f479c5d00 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -14,7 +14,7 @@ - [Variable-size and fixed-size](#variable-size-and-fixed-size) - [Aliases](#aliases) - [Default values](#default-values) - - [`is_empty`](#is_empty) + - [`is_zero`](#is_zero) - [Illegal types](#illegal-types) - [Serialization](#serialization) - [`uintN`](#uintn) @@ -81,9 +81,9 @@ For convenience we alias: The default value of a type upon initialization is recursively defined using `0` for `uintN`, `False` for `boolean` and the elements of `Bitvector`, and `[]` for lists and `Bitlist`. Unions default to the first type in the union (with type index zero), which is `null` if present in the union. -#### `is_empty` +#### `is_zero` -An SSZ object is called empty (and thus, `is_empty(object)` returns true) if it is equal to the default value for that type. +An SSZ object is called zeroed (and thus, `is_zero(object)` returns true) if it is equal to the default value for that type. ### Illegal types diff --git a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py index b005f2456..748386733 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py @@ -33,7 +33,7 @@ def deserialize_basic(value, typ: BasicType): raise Exception(f"Type not supported: {typ}") -def is_empty(obj: SSZValue): +def is_zero(obj: SSZValue): return type(obj).default() == obj From 47714cbf64026ecf74dee52601e9a8c5d7923cc5 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 25 Jul 2019 00:17:43 +0800 Subject: [PATCH 014/130] Add hash_tree_root tests against py-ssz --- test_libs/pyspec/eth2spec/fuzzing/test_decoder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py b/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py index c38fd69af..3c3afa975 100644 --- a/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py +++ b/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py @@ -31,3 +31,4 @@ def test_decoder(): # and see if the hash-tree-root of the original matches the hash-tree-root of the decoded & translated value. assert spec_ssz_impl.hash_tree_root(original) == spec_ssz_impl.hash_tree_root(block) + assert spec_ssz_impl.hash_tree_root(original) == block_sedes.get_hash_tree_root(raw_value) From 62772c9e79cea51278ecb16bd931c6e1d3b26498 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 25 Jul 2019 19:41:12 +0800 Subject: [PATCH 015/130] Refactor --- test_libs/pyspec/eth2spec/fuzzing/test_decoder.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py b/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py index 3c3afa975..77b52e7a2 100644 --- a/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py +++ b/test_libs/pyspec/eth2spec/fuzzing/test_decoder.py @@ -30,5 +30,6 @@ def test_decoder(): block = translate_value(raw_value, typ) # and see if the hash-tree-root of the original matches the hash-tree-root of the decoded & translated value. - assert spec_ssz_impl.hash_tree_root(original) == spec_ssz_impl.hash_tree_root(block) - assert spec_ssz_impl.hash_tree_root(original) == block_sedes.get_hash_tree_root(raw_value) + original_hash_tree_root = spec_ssz_impl.hash_tree_root(original) + assert original_hash_tree_root == spec_ssz_impl.hash_tree_root(block) + assert original_hash_tree_root == block_sedes.get_hash_tree_root(raw_value) From bd69dc7e3cb3db2ba74b9b39afb5f572b30f0d73 Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Thu, 25 Jul 2019 12:28:29 -0600 Subject: [PATCH 016/130] add tests for bit lengths --- specs/core/0_beacon-chain.md | 1 + .../test_process_attestation.py | 58 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index c17815ee8..1822ca5a9 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -1654,6 +1654,7 @@ def process_attestation(state: BeaconState, attestation: Attestation) -> None: attestation_slot = get_attestation_data_slot(state, data) assert attestation_slot + MIN_ATTESTATION_INCLUSION_DELAY <= state.slot <= attestation_slot + SLOTS_PER_EPOCH + committee = get_crosslink_committee(state, data.target.epoch, data.crosslink.shard) assert len(attestation.aggregation_bits) == len(committee) assert len(attestation.custody_bits) == len(committee) diff --git a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py index ab46a0d8c..39b5d03c2 100644 --- a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py +++ b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py @@ -398,3 +398,61 @@ def test_empty_aggregation_bits(spec, state): sign_attestation(spec, state, attestation) yield from run_attestation_processing(spec, state, attestation) + + +@with_all_phases +@spec_state_test +def test_too_many_aggregation_bits(spec, state): + attestation = get_valid_attestation(spec, state, signed=True) + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + + # one too many bits + attestation.aggregation_bits.append(0b0) + + yield from run_attestation_processing(spec, state, attestation, False) + + +@with_all_phases +@spec_state_test +def test_too_few_aggregation_bits(spec, state): + attestation = get_valid_attestation(spec, state) + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + + attestation.aggregation_bits = Bitlist[spec.MAX_VALIDATORS_PER_COMMITTEE]( + *([0b1] + [0b0] * (len(attestation.aggregation_bits) - 1))) + + sign_attestation(spec, state, attestation) + + # one too few bits + attestation.aggregation_bits = attestation.aggregation_bits[:-1] + + yield from run_attestation_processing(spec, state, attestation, False) + + +@with_all_phases +@spec_state_test +def test_too_many_custody_bits(spec, state): + attestation = get_valid_attestation(spec, state, signed=True) + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + + # one too many bits + attestation.custody_bits.append(0b0) + + yield from run_attestation_processing(spec, state, attestation, False) + + +@with_all_phases +@spec_state_test +def test_too_few_custody_bits(spec, state): + attestation = get_valid_attestation(spec, state) + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + + attestation.custody_bits = Bitlist[spec.MAX_VALIDATORS_PER_COMMITTEE]( + *([0b1] + [0b0] * (len(attestation.custody_bits) - 1))) + + sign_attestation(spec, state, attestation) + + # one too few bits + attestation.custody_bits = attestation.custody_bits[:-1] + + yield from run_attestation_processing(spec, state, attestation, False) From 3f75f1e2c36460fe19741b82a36f29199b0c1e50 Mon Sep 17 00:00:00 2001 From: Justin Date: Thu, 25 Jul 2019 19:32:56 +0100 Subject: [PATCH 017/130] Update 0_beacon-chain.md --- specs/core/0_beacon-chain.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index 1822ca5a9..2dfc3d570 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -1655,8 +1655,7 @@ def process_attestation(state: BeaconState, attestation: Attestation) -> None: assert attestation_slot + MIN_ATTESTATION_INCLUSION_DELAY <= state.slot <= attestation_slot + SLOTS_PER_EPOCH committee = get_crosslink_committee(state, data.target.epoch, data.crosslink.shard) - assert len(attestation.aggregation_bits) == len(committee) - assert len(attestation.custody_bits) == len(committee) + assert len(attestation.aggregation_bits) == len(attestation.custody_bits) == len(committee) pending_attestation = PendingAttestation( data=data, From 1ec20f6c1f7ee2b30ad0b94179b29b2702dc5ce3 Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Thu, 25 Jul 2019 12:59:38 -0600 Subject: [PATCH 018/130] do not reset start_shard until after processing compact root --- specs/core/0_beacon-chain.md | 4 ++-- .../test_process_final_updates.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index f0169f1d2..46702abd2 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -1504,8 +1504,6 @@ def process_final_updates(state: BeaconState) -> None: HALF_INCREMENT = EFFECTIVE_BALANCE_INCREMENT // 2 if balance < validator.effective_balance or validator.effective_balance + 3 * HALF_INCREMENT < balance: validator.effective_balance = min(balance - balance % EFFECTIVE_BALANCE_INCREMENT, MAX_EFFECTIVE_BALANCE) - # Update start shard - state.start_shard = Shard((state.start_shard + get_shard_delta(state, current_epoch)) % SHARD_COUNT) # Set active index root index_epoch = Epoch(next_epoch + ACTIVATION_EXIT_DELAY) index_root_position = index_epoch % EPOCHS_PER_HISTORICAL_VECTOR @@ -1522,6 +1520,8 @@ def process_final_updates(state: BeaconState) -> None: if next_epoch % (SLOTS_PER_HISTORICAL_ROOT // SLOTS_PER_EPOCH) == 0: historical_batch = HistoricalBatch(block_roots=state.block_roots, state_roots=state.state_roots) state.historical_roots.append(hash_tree_root(historical_batch)) + # Update start shard + state.start_shard = Shard((state.start_shard + get_shard_delta(state, current_epoch)) % SHARD_COUNT) # Rotate current/previous epoch attestations state.previous_epoch_attestations = state.current_epoch_attestations state.current_epoch_attestations = [] diff --git a/test_libs/pyspec/eth2spec/test/phase_0/epoch_processing/test_process_final_updates.py b/test_libs/pyspec/eth2spec/test/phase_0/epoch_processing/test_process_final_updates.py index 58882a44f..385cc289b 100644 --- a/test_libs/pyspec/eth2spec/test/phase_0/epoch_processing/test_process_final_updates.py +++ b/test_libs/pyspec/eth2spec/test/phase_0/epoch_processing/test_process_final_updates.py @@ -89,3 +89,20 @@ def test_historical_root_accumulator(spec, state): yield from run_process_final_updates(spec, state) assert len(state.historical_roots) == history_len + 1 + + +@with_all_phases +@spec_state_test +def test_compact_committees_root(spec, state): + assert spec.SLOTS_PER_ETH1_VOTING_PERIOD > spec.SLOTS_PER_EPOCH + # skip ahead to the end of the epoch + state.slot = spec.SLOTS_PER_EPOCH - 1 + + next_epoch = spec.get_current_epoch(state) + 1 + + # ensure that order in which items are processed in final_updates + # does not alter the expected_root + expected_root = spec.get_compact_committees_root(state, next_epoch) + yield from run_process_final_updates(spec, state) + + assert state.compact_committees_roots[next_epoch % spec.EPOCHS_PER_HISTORICAL_VECTOR] == expected_root From b73625fbf16330787ac586030c96de54a33b9816 Mon Sep 17 00:00:00 2001 From: protolambda Date: Thu, 25 Jul 2019 23:13:33 +0200 Subject: [PATCH 019/130] update test generation code (work in progress), improve the simplicity of configuration in context of forks, and update docs --- configs/README.md | 36 +++ configs/constant_presets/README.md | 20 -- configs/fork_timelines/README.md | 19 -- configs/fork_timelines/mainnet.yaml | 12 - configs/fork_timelines/testing.yaml | 6 - configs/{constant_presets => }/mainnet.yaml | 0 configs/{constant_presets => }/minimal.yaml | 0 specs/test_formats/README.md | 250 +++++++++---------- test_generators/epoch_processing/main.py | 50 ++-- test_libs/gen_helpers/gen_base/gen_runner.py | 53 ++-- test_libs/gen_helpers/gen_base/gen_suite.py | 22 -- test_libs/gen_helpers/gen_base/gen_typing.py | 32 ++- test_libs/gen_helpers/gen_from_tests/gen.py | 30 ++- 13 files changed, 257 insertions(+), 273 deletions(-) create mode 100644 configs/README.md delete mode 100644 configs/constant_presets/README.md delete mode 100644 configs/fork_timelines/README.md delete mode 100644 configs/fork_timelines/mainnet.yaml delete mode 100644 configs/fork_timelines/testing.yaml rename configs/{constant_presets => }/mainnet.yaml (100%) rename configs/{constant_presets => }/minimal.yaml (100%) delete mode 100644 test_libs/gen_helpers/gen_base/gen_suite.py diff --git a/configs/README.md b/configs/README.md new file mode 100644 index 000000000..8adb939c8 --- /dev/null +++ b/configs/README.md @@ -0,0 +1,36 @@ +# Configs + +This directory contains a set of constants presets used for testing, testnets, and mainnet. + +A preset file contains all the constants known for its target. +Later-fork constants can be ignored, e.g. ignore phase1 constants as a client that only supports phase 0 currently. + + +## Forking + +Configs are not replaced, but extended with forks. This is to support syncing from one state to the other over a fork boundary, without hot-swapping a config. +Instead, for forks that introduce changes in a constant, the constant name is prefixed with a short abbreviation of the fork. + +Over time, the need to sync an older state may be deprecated. +In this case, the prefix on the new constant may be removed, and the old constant will keep a special name before completely being removed. + +A previous iteration of forking made use of "timelines", but this collides with the definitions used in the spec (constants for special forking slots etc.), + and was not integrated sufficiently in any of the spec tools or implementations. +Instead, the config essentially doubles as fork definition now, changing the value for e.g. `PHASE_1_GENESIS_SLOT` changes the fork. + +Another reason to prefer forking through constants is the ability to program a forking moment based on context, instead of being limited to a static slot number. + + +## Format + +Each preset is a key-value mapping. + +**Key**: an `UPPER_SNAKE_CASE` (a.k.a. "macro case") formatted string, name of the constant. + +**Value** can be either: + - an unsigned integer number, can be up to 64 bits (incl.) + - a hexadecimal string, prefixed with `0x` + +Presets may contain comments to describe the values. + +See [`mainnet.yaml`](./mainnet.yaml) for a complete example. diff --git a/configs/constant_presets/README.md b/configs/constant_presets/README.md deleted file mode 100644 index 61c9a3a63..000000000 --- a/configs/constant_presets/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# Constant Presets - -This directory contains a set of constants presets used for testing, testnets, and mainnet. - -A preset file contains all the constants known for its target. -Later-fork constants can be ignored, e.g. ignore phase1 constants as a client that only supports phase 0 currently. - -## Format - -Each preset is a key-value mapping. - -**Key**: an `UPPER_SNAKE_CASE` (a.k.a. "macro case") formatted string, name of the constant. - -**Value** can be either: - - an unsigned integer number, can be up to 64 bits (incl.) - - a hexadecimal string, prefixed with `0x` - -Presets may contain comments to describe the values. - -See [`mainnet.yaml`](./mainnet.yaml) for a complete example. diff --git a/configs/fork_timelines/README.md b/configs/fork_timelines/README.md deleted file mode 100644 index da7445767..000000000 --- a/configs/fork_timelines/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Fork timelines - -This directory contains a set of fork timelines used for testing, testnets, and mainnet. - -A timeline file contains all the forks known for its target. -Later forks can be ignored, e.g. ignore fork `phase1` as a client that only supports Phase 0 currently. - -## Format - -Each preset is a key-value mapping. - -**Key**: an `lower_snake_case` (a.k.a. "python case") formatted string, name of the fork. - -**Value**: an unsigned integer number, epoch number of activation of the fork. - -Timelines may contain comments to describe the values. - -See [`mainnet.yaml`](./mainnet.yaml) for a complete example. - diff --git a/configs/fork_timelines/mainnet.yaml b/configs/fork_timelines/mainnet.yaml deleted file mode 100644 index 0bb3c9db1..000000000 --- a/configs/fork_timelines/mainnet.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Mainnet fork timeline - -# Equal to GENESIS_EPOCH -phase0: 67108864 - -# Example 1: -# phase0_funny_fork_name: 67116000 - -# Example 2: -# Should be equal to PHASE_1_FORK_EPOCH -# (placeholder in example value here) -# phase1: 67163000 diff --git a/configs/fork_timelines/testing.yaml b/configs/fork_timelines/testing.yaml deleted file mode 100644 index 957a53b8c..000000000 --- a/configs/fork_timelines/testing.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# Testing fork timeline - -# Equal to GENESIS_EPOCH -phase0: 536870912 - -# No other forks considered in testing yet (to be implemented) diff --git a/configs/constant_presets/mainnet.yaml b/configs/mainnet.yaml similarity index 100% rename from configs/constant_presets/mainnet.yaml rename to configs/mainnet.yaml diff --git a/configs/constant_presets/minimal.yaml b/configs/minimal.yaml similarity index 100% rename from configs/constant_presets/minimal.yaml rename to configs/minimal.yaml diff --git a/specs/test_formats/README.md b/specs/test_formats/README.md index e4f013d8b..196315185 100644 --- a/specs/test_formats/README.md +++ b/specs/test_formats/README.md @@ -5,21 +5,25 @@ This document defines the YAML format and structure used for Eth 2.0 testing. ## Table of contents -- [General test format](#general-test-format) - - [Table of contents](#table-of-contents) - - [About](#about) - - [Test-case formats](#test-case-formats) - - [Glossary](#glossary) - - [Test format philosophy](#test-format-philosophy) - - [Config design](#config-design) - - [Fork config design](#fork-config-design) - - [Test completeness](#test-completeness) - - [Test suite](#test-suite) - - [Config](#config) - - [Fork-timeline](#fork-timeline) - - [Config sourcing](#config-sourcing) - - [Test structure](#test-structure) - - [Note for implementers](#note-for-implementers) +* [About](#about) + + [Test-case formats](#test-case-formats) +* [Glossary](#glossary) +* [Test format philosophy](#test-format-philosophy) + + [Config design](#config-design) + + [Test completeness](#test-completeness) +* [Test structure](#test-structure) + + [`/`](#--config-name---) + + [`/`](#--fork-or-phase-name---) + + [`/`](#--test-runner-name---) + + [`/`](#--test-handler-name---) + + [`/`](#--test-suite-name---) + + [`/`](#--test-case---) + + [``](#--output-part--) + - [Special output parts](#special-output-parts) + * [`meta.yaml`](#-metayaml-) +* [Config](#config) +* [Config sourcing](#config-sourcing) +* [Note for implementers](#note-for-implementers) @@ -42,6 +46,7 @@ Test formats: - [`ssz_static`](./ssz_static/README.md) - More formats are planned, see tracking issues for CI/testing + ## Glossary - `generator`: a program that outputs one or more `suite` files. @@ -59,13 +64,13 @@ Test formats: - `case`: a test case, an entry in the `test_cases` list of a `suite`. A case can be anything in general, but its format should be well-defined in the documentation corresponding to the `type` (and `handler`).\ A test has the same exact configuration and fork context as the other entries in the `case` list of its `suite`. -- `forks_timeline`: a fork timeline definition, a YAML file containing a key for each fork-name, and an epoch number as value. + ## Test format philosophy ### Config design -After long discussion, the following types of configured constants were identified: +The configuration constant types are: - Never changing: genesis data. - Changing, but reliant on old value: e.g. an epoch time may change, but if you want to do the conversion `(genesis data, timestamp) -> epoch number`, you end up needing both constants. @@ -75,26 +80,12 @@ After long discussion, the following types of configured constants were identifi - Changing: there is a very small chance some constant may really be *replaced*. In this off-chance, it is likely better to include it as an additional variable, and some clients may simply stop supporting the old one if they do not want to sync from genesis. + The change of functionality goes through a phase of deprecation of the old constant, and eventually only the new constant is kept around in the config (when old state is not supported anymore). Based on these types of changes, we model the config as a list of key value pairs, that only grows with every fork (they may change in development versions of forks, however; git manages this). With this approach, configurations are backwards compatible (older clients ignore unknown variables) and easy to maintain. -### Fork config design - -There are two types of fork-data: -1) Timeline: When does a fork take place? -2) Coverage: What forks are covered by a test? - -The first is neat to have as a separate form: we prevent duplication, and can run with different presets - (e.g. fork timeline for a minimal local test, for a public testnet, or for mainnet). - -The second does not affect the result of the tests, it just states what is covered by the tests, - so that the right suites can be executed to see coverage for a certain fork. -For some types of tests, it may be beneficial to ensure it runs exactly the same, with any given fork "active". -Test-formats can be explicit on the need to repeat a test with different forks being "active", - but generally tests run only once. - ### Test completeness Tests should be independent of any sync-data. If one wants to run a test, the input data should be available from the YAML. @@ -104,93 +95,66 @@ The aim is to provide clients with a well-defined scope of work to run a particu - Clients that are not complete in functionality can choose to ignore suites that use certain test-runners, or specific handlers of these test-runners. - Clients that are on older versions can test their work based on older releases of the generated tests, and catch up with newer releases when possible. -## Test suite - -``` -title: -- Display name for the test suite -summary: -- Summarizes the test suite -forks_timeline: -- Used to determine the forking timeline -forks: -- Defines the coverage. Test-runner code may decide to re-run with the different forks "activated", when applicable. -config: -- Used to determine which set of constants to run (possibly compile time) with -runner: *MUST be consistent with folder structure* -handler: *MUST be consistent with folder structure* - -test_cases: - ... - -``` - -## Config - -A configuration is a separate YAML file. -Separation of configuration and tests aims to: -- Prevent duplication of configuration -- Make all tests easy to upgrade (e.g. when a new config constant is introduced) -- Clearly define which constants to use -- Shareable between clients, for cross-client short- or long-lived testnets -- Minimize the amounts of different constants permutations to compile as a client. - *Note*: Some clients prefer compile-time constants and optimizations. - They should compile for each configuration once, and run the corresponding tests per build target. - -The format is described in [`configs/constant_presets`](../../configs/constant_presets/README.md#format). - - -## Fork-timeline - -A fork timeline is (preferably) loaded in as a configuration object into a client, as opposed to the constants configuration: - - We do not allocate or optimize any code based on epoch numbers. - - When we transition from one fork to the other, it is preferred to stay online. - - We may decide on an epoch number for a fork based on external events (e.g. Eth1 log event); - a client should be able to activate a fork dynamically. - -The format is described in [`configs/fork_timelines`](../../configs/fork_timelines/README.md#format). - -## Config sourcing - -The constants configurations are located in: - -``` -/configs/constant_presets/.yaml -``` - -And copied by CI for testing purposes to: - -``` -/configs/constant_presets/.yaml -``` - - -The fork timelines are located in: - -``` -/configs/fork_timelines/.yaml -``` - -And copied by CI for testing purposes to: - -``` -/configs/fork_timelines/.yaml -``` ## Test structure -To prevent parsing of hundreds of different YAML files to test a specific test type, - or even more specific, just a handler, tests should be structured in the following nested form: - ``` -. <--- root of eth2.0 tests repository -├── bls <--- collection of handler for a specific test-runner, example runner: "bls" -│   ├── verify_msg <--- collection of test suites for a specific handler, example handler: "verify_msg". If no multiple handlers, use a dummy folder (e.g. "core"), and specify that in the yaml. -│   │   ├── verify_valid.yml . -│   │   ├── special_cases.yml . a list of test suites -│   │   ├── domains.yml . -│   │   ├── invalid.yml . -│   │   ... <--- more suite files (optional) -│   ... <--- more handlers -... <--- more test types +File path structure: +tests/////// ``` -## Common test-case properties +### `/` + +Configs are upper level. Some clients want to run minimal first, and useful for sanity checks during development too. +As a top level dir, it is not duplicated, and the used config can be copied right into this directory as reference. + +### `/` + +This would be: "phase0", "transferparty", "phase1", etc. Each introduces new tests, but does not copy tests that do not change. +If you like to test phase 1, you run phase 0 tests, with the configuration that includes phase 1 changes. Out of scope for now however. + +### `/` + +The well known bls/shuffling/ssz_static/operations/epoch_processing/etc. Handlers can change the format, but there is a general target to test. + + +### `/` + +Specialization within category. All suites in here will have the same test case format. + +### `/` + +Suites are split up. Suite size does not change memory bounds, and makes lookups of particular tests fast to find and load. + +### `/` + +Cases are split up too. This enables diffing of parts of the test case, tracking changes per part, while still using LFS. Also enables different formats for some parts. + +### `` + +E.g. `pre.yaml`, `deposit.yaml`, `post.yaml`. + +Diffing a `pre.yaml` and `post.yaml` provides all the information for testing, good for readability of the change. +Then the difference between pre and post can be compared to anything that changes the pre state, e.g. `deposit.yaml` + +These files allow for custom formats for some parts of the test. E.g. something encoded in SSZ. + +Some yaml files have copies, but formatted as raw SSZ bytes: `pre.ssz`, `deposit.ssz`, `post.ssz`. +The yaml files are intended to be deprecated, and clients should shift to ssz inputs for efficiency. +Deprecation will start once a viewer of SSZ test-cases is in place, to maintain a standard of readable test cases. +This also means that some clients can drop legacy YAML -> JSON/other -> SSZ work-arounds. +(These were implemented to support the uint64 YAML, hex strings, etc. Things that were not idiomatic to their language.) + +Yaml will not be deprecated for tests that do not use SSZ: e.g. shuffling and BLS tests. +In this case, there is no work around for loading necessary anyway, and the size and efficiency of yaml is acceptable. + +#### Special output parts + +##### `meta.yaml` + +If present (it is optional), the test is enhanced with extra data to describe usage. Specialized data is described in the documentation of the specific test format. + +Common data is documented here: Some test-case formats share some common key-value pair patterns, and these are documented here: @@ -203,22 +167,52 @@ bls_setting: int -- optional, can have 3 different values: 2: known as "BLS ignored" - if the test validity is strictly dependent on BLS being OFF ``` + +## Config + +A configuration is a separate YAML file. +Separation of configuration and tests aims to: +- Prevent duplication of configuration +- Make all tests easy to upgrade (e.g. when a new config constant is introduced) +- Clearly define which constants to use +- Shareable between clients, for cross-client short- or long-lived testnets +- Minimize the amounts of different constants permutations to compile as a client. + *Note*: Some clients prefer compile-time constants and optimizations. + They should compile for each configuration once, and run the corresponding tests per build target. +- Includes constants to coordinate forking with. + +The format is described in [`/configs`](../../configs/README.md#format). + + +## Config sourcing + +The constants configurations are located in: + +``` +/configs/.yaml +``` + +And copied by CI for testing purposes to: + +``` +/tests//.yaml +``` + +The first `` is a directory, which contains exactly all tests that make use of the given config. + + ## Note for implementers The basic pattern for test-suite loading and running is: -Iterate suites for given test-type, or sub-type (e.g. `operations > deposits`): -1. Filter test-suite, options: - - Config: Load first few lines, load into YAML, and check `config`, either: - - Pass the suite to the correct compiled target - - Ignore the suite if running tests as part of a compiled target with different configuration - - Load the correct configuration for the suite dynamically before running the suite - - Select by file name - - Filter for specific suites (e.g. for a specific fork) -2. Load the YAML - - Optionally translate the data into applicable naming, e.g. `snake_case` to `PascalCase` -3. Iterate through the `test_cases` -4. Ask test-runner to allocate a new test-case (i.e. objectify the test-case, generalize it with a `TestCase` interface) - Optionally pass raw test-case data to enable dynamic test-case allocation. - 1. Load test-case data into it. - 2. Make the test-case run. +1. For a specific config, load it first (and only need to do so once), + then continue with the tests defined in the config folder. +2. Select a fork. Repeat for each fork if running tests for multiple forks. +3. Select the category and specialization of interest (e.g. `operations > deposits`). Again, repeat for each if running all. +4. Select a test suite. Or repeat for each. +5. Select a test case. Or repeat for each. +6. Load the parts of the case. And `meta.yaml` if present. +7. Run the test, as defined by the test format. + +Step 1 may be a step with compile time selection of a configuration, if desired for optimization. +The base requirement is just to use the same set of constants, independent of the loading process. diff --git a/test_generators/epoch_processing/main.py b/test_generators/epoch_processing/main.py index 6a578c598..da41c9e95 100644 --- a/test_generators/epoch_processing/main.py +++ b/test_generators/epoch_processing/main.py @@ -14,40 +14,36 @@ from gen_from_tests.gen import generate_from_tests from preset_loader import loader -def create_suite(transition_name: str, config_name: str, get_cases: Callable[[], Iterable[gen_typing.TestCase]]) \ - -> Callable[[str], gen_typing.TestSuiteOutput]: - def suite_definition(configs_path: str) -> gen_typing.TestSuiteOutput: +def create_suite(handler_name: str, tests_src, config_name: str) \ + -> Callable[[str], gen_typing.TestProvider]: + + def prepare_fn(configs_path: str) -> str: presets = loader.load_presets(configs_path, config_name) spec_phase0.apply_constants_preset(presets) spec_phase1.apply_constants_preset(presets) + return config_name - return ("%s_%s" % (transition_name, config_name), transition_name, gen_suite.render_suite( - title="%s epoch processing" % transition_name, - summary="Test suite for %s type epoch processing" % transition_name, - forks_timeline="testing", - forks=["phase0"], - config=config_name, - runner="epoch_processing", - handler=transition_name, - test_cases=get_cases())) + def cases_fn() -> Iterable[gen_typing.TestCase]: + return generate_from_tests( + runner_name='epoch_processing', + handler_name=handler_name, + src=tests_src, + fork_name='phase0' + ) - return suite_definition + return gen_typing.TestProvider(prepare=prepare_fn, make_cases=cases_fn) if __name__ == "__main__": gen_runner.run_generator("epoch_processing", [ - create_suite('crosslinks', 'minimal', lambda: generate_from_tests(test_process_crosslinks, 'phase0')), - create_suite('crosslinks', 'mainnet', lambda: generate_from_tests(test_process_crosslinks, 'phase0')), - create_suite('final_updates', 'minimal', lambda: generate_from_tests(test_process_final_updates, 'phase0')), - create_suite('final_updates', 'mainnet', lambda: generate_from_tests(test_process_final_updates, 'phase0')), - create_suite('justification_and_finalization', 'minimal', - lambda: generate_from_tests(test_process_justification_and_finalization, 'phase0')), - create_suite('justification_and_finalization', 'mainnet', - lambda: generate_from_tests(test_process_justification_and_finalization, 'phase0')), - create_suite('registry_updates', 'minimal', - lambda: generate_from_tests(test_process_registry_updates, 'phase0')), - create_suite('registry_updates', 'mainnet', - lambda: generate_from_tests(test_process_registry_updates, 'phase0')), - create_suite('slashings', 'minimal', lambda: generate_from_tests(test_process_slashings, 'phase0')), - create_suite('slashings', 'mainnet', lambda: generate_from_tests(test_process_slashings, 'phase0')), + create_suite('crosslinks', test_process_crosslinks, 'minimal'), + create_suite('crosslinks', test_process_crosslinks, 'mainnet'), + create_suite('final_updates', test_process_final_updates, 'minimal'), + create_suite('final_updates', test_process_final_updates, 'mainnet'), + create_suite('justification_and_finalization', test_process_justification_and_finalization, 'minimal'), + create_suite('justification_and_finalization', test_process_justification_and_finalization, 'mainnet'), + create_suite('registry_updates', test_process_registry_updates, 'minimal'), + create_suite('registry_updates', test_process_registry_updates, 'mainnet'), + create_suite('slashings', test_process_slashings, 'minimal'), + create_suite('slashings', test_process_slashings, 'mainnet'), ]) diff --git a/test_libs/gen_helpers/gen_base/gen_runner.py b/test_libs/gen_helpers/gen_base/gen_runner.py index e36d48b8b..b118f48d9 100644 --- a/test_libs/gen_helpers/gen_base/gen_runner.py +++ b/test_libs/gen_helpers/gen_base/gen_runner.py @@ -7,7 +7,7 @@ from ruamel.yaml import ( YAML, ) -from gen_base.gen_typing import TestSuiteCreator +from gen_base.gen_typing import TestProvider def validate_output_dir(path_str): @@ -46,14 +46,17 @@ def validate_configs_dir(path_str): return path -def run_generator(generator_name, suite_creators: List[TestSuiteCreator]): +def run_generator(generator_name, test_providers: Iterable[TestProvider]): """ Implementation for a general test generator. :param generator_name: The name of the generator. (lowercase snake_case) - :param suite_creators: A list of suite creators, each of these builds a list of test cases. + :param test_providers: A list of test provider, + each of these returns a callable that returns an iterable of test cases. + The call to get the iterable may set global configuration, + and the iterable should not be resumed after a pause with a change of that configuration. :return: """ - + parser = argparse.ArgumentParser( prog="gen-" + generator_name, description=f"Generate YAML test suite files for {generator_name}", @@ -92,24 +95,32 @@ def run_generator(generator_name, suite_creators: List[TestSuiteCreator]): yaml = YAML(pure=True) yaml.default_flow_style = None - print(f"Generating tests for {generator_name}, creating {len(suite_creators)} test suite files...") + print(f"Generating tests into {output_dir}...") print(f"Reading config presets and fork timelines from {args.configs_path}") - for suite_creator in suite_creators: - (output_name, handler, suite) = suite_creator(args.configs_path) - handler_output_dir = Path(output_dir) / Path(handler) - try: - if not handler_output_dir.exists(): - handler_output_dir.mkdir() - except FileNotFoundError as e: - sys.exit(f'Error when creating handler dir {handler} for test "{suite["title"]}" ({e})') + for tprov in test_providers: + # loads configuration etc. + config_name = tprov.prepare(args.configs_path) + for test_case in tprov.make_cases(): + case_dir = Path(output_dir) / Path(config_name) / Path(test_case.fork_name) \ + / Path(test_case.runner_name) / Path(test_case.handler_name) \ + / Path(test_case.suite_name) / Path(test_case.case_name) + print(f'Generating test: {case_dir}') - out_path = handler_output_dir / Path(output_name + '.yaml') + case_dir.mkdir(parents=True, exist_ok=True) - try: - with out_path.open(file_mode) as f: - yaml.dump(suite, f) - except IOError as e: - sys.exit(f'Error when dumping test "{suite["title"]}" ({e})') - - print("done.") + try: + for case_part in test_case.case_fn(): + if case_part.out_kind == "data" or case_part.out_kind == "ssz": + try: + out_path = case_dir / Path(case_part.name + '.yaml') + with out_path.open(file_mode) as f: + yaml.dump(case_part.data, f) + except IOError as e: + sys.exit(f'Error when dumping test "{case_dir}", part "{case_part.name}": {e}') + # if out_kind == "ssz": + # # TODO write SSZ as binary file too. + # out_path = case_dir / Path(name + '.ssz') + except Exception as e: + print(f"ERROR: failed to generate vector(s) for test {case_dir}: {e}") + print(f"completed {generator_name}") diff --git a/test_libs/gen_helpers/gen_base/gen_suite.py b/test_libs/gen_helpers/gen_base/gen_suite.py deleted file mode 100644 index a3f88791f..000000000 --- a/test_libs/gen_helpers/gen_base/gen_suite.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import Iterable - -from eth_utils import to_dict -from gen_base.gen_typing import TestCase - - -@to_dict -def render_suite(*, - title: str, summary: str, - forks_timeline: str, forks: Iterable[str], - config: str, - runner: str, - handler: str, - test_cases: Iterable[TestCase]): - yield "title", title - yield "summary", summary - yield "forks_timeline", forks_timeline, - yield "forks", forks - yield "config", config - yield "runner", runner - yield "handler", handler - yield "test_cases", test_cases diff --git a/test_libs/gen_helpers/gen_base/gen_typing.py b/test_libs/gen_helpers/gen_base/gen_typing.py index 011326a69..91c4be74a 100644 --- a/test_libs/gen_helpers/gen_base/gen_typing.py +++ b/test_libs/gen_helpers/gen_base/gen_typing.py @@ -1,14 +1,34 @@ from typing import ( Any, Callable, + Iterable, Dict, Tuple, ) +from collections import namedtuple -TestCase = Dict[str, Any] -TestSuite = Dict[str, Any] -# Tuple: (output name, handler name, suite) -- output name excl. ".yaml" -TestSuiteOutput = Tuple[str, str, TestSuite] -# Args: -TestSuiteCreator = Callable[[str], TestSuiteOutput] +@dataclass +class TestCasePart(object): + name: str # name of the file + out_kind: str # type of data ("data" for generic, "ssz" for SSZ encoded bytes) + data: Any + + +@dataclass +class TestCase(object): + fork_name: str + runner_name: str + handler_name: str + suite_name: str + case_name: str + case_fn: Callable[[], Iterable[TestCasePart]] + + +@dataclass +class TestProvider(object): + # Prepares the context with a configuration, loaded from the given config path. + # fn(config path) => chosen config name + prepare: Callable[[str], str] + # Retrieves an iterable of cases, called after prepare() + make_cases: Callable[[], Iterable[TestCase]] diff --git a/test_libs/gen_helpers/gen_from_tests/gen.py b/test_libs/gen_helpers/gen_from_tests/gen.py index 3810c385e..cc64fbf41 100644 --- a/test_libs/gen_helpers/gen_from_tests/gen.py +++ b/test_libs/gen_helpers/gen_from_tests/gen.py @@ -1,26 +1,32 @@ from inspect import getmembers, isfunction -def generate_from_tests(src, phase, bls_active=True): +from gen_base.gen_typing import TestCase + + +def generate_from_tests(runner_name: str, handler_name: str, src: Any, + fork_name: str, bls_active: bool = True) -> Iterable[TestCase]: """ Generate a list of test cases by running tests from the given src in generator-mode. + :param runner_name: to categorize the test in general as. + :param handler_name: to categorize the test specialization as. :param src: to retrieve tests from (discovered using inspect.getmembers). - :param phase: to run tests against particular phase. + :param fork_name: to run tests against particular phase and/or fork. + (if multiple forks are applicable, indicate the last fork) :param bls_active: optional, to override BLS switch preference. Defaults to True. - :return: the list of test cases. + :return: an iterable of test cases. """ fn_names = [ name for (name, _) in getmembers(src, isfunction) if name.startswith('test_') ] - out = [] print("generating test vectors from tests source: %s" % src.__name__) for name in fn_names: tfn = getattr(src, name) - try: - test_case = tfn(generator_mode=True, phase=phase, bls_active=bls_active) - # If no test case data is returned, the test is ignored. - if test_case is not None: - out.append(test_case) - except AssertionError: - print("ERROR: failed to generate vector from test: %s (src: %s)" % (name, src.__name__)) - return out + yield TestCase( + fork_name=fork_name, + runner_name=runner_name, + handler_name=handler_name, + suite_name='pyspec_tests', + case_name=name, + case_fn=lambda: tfn(generator_mode=True, phase=phase, bls_active=bls_active) + ) From 69052ac75080db77169632af9cd91006df45038c Mon Sep 17 00:00:00 2001 From: protolambda Date: Fri, 26 Jul 2019 19:19:36 +0200 Subject: [PATCH 020/130] Update testgen code, and if force is not on, test generation won't run if it already exists. --- test_libs/gen_helpers/gen_base/gen_runner.py | 70 +++++++++------- test_libs/gen_helpers/gen_base/gen_typing.py | 14 ++-- test_libs/gen_helpers/gen_from_tests/gen.py | 10 ++- test_libs/pyspec/eth2spec/test/context.py | 7 +- test_libs/pyspec/eth2spec/test/utils.py | 88 ++++++++++---------- 5 files changed, 102 insertions(+), 87 deletions(-) diff --git a/test_libs/gen_helpers/gen_base/gen_runner.py b/test_libs/gen_helpers/gen_base/gen_runner.py index b118f48d9..9a2d26664 100644 --- a/test_libs/gen_helpers/gen_base/gen_runner.py +++ b/test_libs/gen_helpers/gen_base/gen_runner.py @@ -31,18 +31,6 @@ def validate_configs_dir(path_str): if not path.is_dir(): raise argparse.ArgumentTypeError("Config path must lead to a directory") - if not Path(path, "constant_presets").exists(): - raise argparse.ArgumentTypeError("Constant Presets directory must exist") - - if not Path(path, "constant_presets").is_dir(): - raise argparse.ArgumentTypeError("Constant Presets path must lead to a directory") - - if not Path(path, "fork_timelines").exists(): - raise argparse.ArgumentTypeError("Fork Timelines directory must exist") - - if not Path(path, "fork_timelines").is_dir(): - raise argparse.ArgumentTypeError("Fork Timelines path must lead to a directory") - return path @@ -56,7 +44,7 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]): and the iterable should not be resumed after a pause with a change of that configuration. :return: """ - + parser = argparse.ArgumentParser( prog="gen-" + generator_name, description=f"Generate YAML test suite files for {generator_name}", @@ -74,7 +62,7 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]): "--force", action="store_true", default=False, - help="if set overwrite test files if they exist", + help="if set re-generate and overwrite test files if they already exist", ) parser.add_argument( "-c", @@ -102,25 +90,43 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]): # loads configuration etc. config_name = tprov.prepare(args.configs_path) for test_case in tprov.make_cases(): - case_dir = Path(output_dir) / Path(config_name) / Path(test_case.fork_name) \ - / Path(test_case.runner_name) / Path(test_case.handler_name) \ - / Path(test_case.suite_name) / Path(test_case.case_name) - print(f'Generating test: {case_dir}') + case_dir = Path(output_dir) / Path(config_name) / Path(test_case.fork_name) \ + / Path(test_case.runner_name) / Path(test_case.handler_name) \ + / Path(test_case.suite_name) / Path(test_case.case_name) + if case_dir.exists(): + if not args.force: + print(f'Skipping already existing test: {case_dir}') + continue + print(f'Warning, output directory {case_dir} already exist,' + f' old files are not deleted but will be overwritten when a new version is produced') + + print(f'Generating test: {case_dir}') + try: case_dir.mkdir(parents=True, exist_ok=True) + meta = dict() + for (name, out_kind, data) in test_case.case_fn(): + if out_kind == "meta": + meta[name] = data + elif out_kind == "data" or out_kind == "ssz": + try: + out_path = case_dir / Path(name + '.yaml') + with out_path.open(file_mode) as f: + yaml.dump(data, f) + except IOError as e: + sys.exit(f'Error when dumping test "{case_dir}", part "{name}": {e}') + # if out_kind == "ssz": + # # TODO write SSZ as binary file too. + # out_path = case_dir / Path(name + '.ssz') + # Once all meta data is collected (if any), write it to a meta data file. + if len(meta) != 0: + try: + out_path = case_dir / Path('meta.yaml') + with out_path.open(file_mode) as f: + yaml.dump(meta, f) + except IOError as e: + sys.exit(f'Error when dumping test "{case_dir}" meta data": {e}') - try: - for case_part in test_case.case_fn(): - if case_part.out_kind == "data" or case_part.out_kind == "ssz": - try: - out_path = case_dir / Path(case_part.name + '.yaml') - with out_path.open(file_mode) as f: - yaml.dump(case_part.data, f) - except IOError as e: - sys.exit(f'Error when dumping test "{case_dir}", part "{case_part.name}": {e}') - # if out_kind == "ssz": - # # TODO write SSZ as binary file too. - # out_path = case_dir / Path(name + '.ssz') - except Exception as e: - print(f"ERROR: failed to generate vector(s) for test {case_dir}: {e}") + except Exception as e: + print(f"ERROR: failed to generate vector(s) for test {case_dir}: {e}") print(f"completed {generator_name}") diff --git a/test_libs/gen_helpers/gen_base/gen_typing.py b/test_libs/gen_helpers/gen_base/gen_typing.py index 91c4be74a..97ddfa713 100644 --- a/test_libs/gen_helpers/gen_base/gen_typing.py +++ b/test_libs/gen_helpers/gen_base/gen_typing.py @@ -2,17 +2,19 @@ from typing import ( Any, Callable, Iterable, + NewType, Dict, Tuple, ) from collections import namedtuple - -@dataclass -class TestCasePart(object): - name: str # name of the file - out_kind: str # type of data ("data" for generic, "ssz" for SSZ encoded bytes) - data: Any +# Elements: name, out_kind, data +# +# out_kind is the type of data: +# - "data" for generic +# - "ssz" for SSZ encoded bytes +# - "meta" for generic data to collect into a meta data dict. +TestCasePart = NewType("TestCasePart", Tuple[str, str, Any]) @dataclass diff --git a/test_libs/gen_helpers/gen_from_tests/gen.py b/test_libs/gen_helpers/gen_from_tests/gen.py index cc64fbf41..22496de6b 100644 --- a/test_libs/gen_helpers/gen_from_tests/gen.py +++ b/test_libs/gen_helpers/gen_from_tests/gen.py @@ -22,11 +22,17 @@ def generate_from_tests(runner_name: str, handler_name: str, src: Any, print("generating test vectors from tests source: %s" % src.__name__) for name in fn_names: tfn = getattr(src, name) + + # strip off the `test_` + case_name = name + if case_name.startswith('test_'): + case_name = case_name[5:] + yield TestCase( fork_name=fork_name, runner_name=runner_name, handler_name=handler_name, suite_name='pyspec_tests', - case_name=name, - case_fn=lambda: tfn(generator_mode=True, phase=phase, bls_active=bls_active) + case_name=case_name, + case_fn=lambda: tfn(generator_mode=True, fork_name=fork_name, bls_active=bls_active) ) diff --git a/test_libs/pyspec/eth2spec/test/context.py b/test_libs/pyspec/eth2spec/test/context.py index e7560afc6..2adb76da0 100644 --- a/test_libs/pyspec/eth2spec/test/context.py +++ b/test_libs/pyspec/eth2spec/test/context.py @@ -28,7 +28,9 @@ DEFAULT_BLS_ACTIVE = False def spectest_with_bls_switch(fn): - return bls_switch(spectest()(fn)) + # Bls switch must be wrapped by spectest, + # to fully go through the yielded bls switch data, before setting back the BLS setting. + return spectest()(bls_switch(fn)) # shorthand for decorating @with_state @spectest() @@ -88,9 +90,8 @@ def bls_switch(fn): def entry(*args, **kw): old_state = bls.bls_active bls.bls_active = kw.pop('bls_active', DEFAULT_BLS_ACTIVE) - out = fn(*args, **kw) + yield from fn(*args, **kw) bls.bls_active = old_state - return out return entry diff --git a/test_libs/pyspec/eth2spec/test/utils.py b/test_libs/pyspec/eth2spec/test/utils.py index 253691764..4ecabb114 100644 --- a/test_libs/pyspec/eth2spec/test/utils.py +++ b/test_libs/pyspec/eth2spec/test/utils.py @@ -1,87 +1,87 @@ -from typing import Dict, Any, Callable, Iterable +from typing import Dict, Any from eth2spec.debug.encode import encode from eth2spec.utils.ssz.ssz_typing import SSZValue def spectest(description: str = None): + """ + Spectest decorator, should always be the most outer decorator around functions that yield data. + to deal with silent iteration through yielding function when in a pytest context (i.e. not in generator mode). + :param description: Optional description for the test to add to the metadata. + :return: Decorator. + """ def runner(fn): - # this wraps the function, to hide that the function actually is yielding data, instead of returning once. + # this wraps the function, to yield type-annotated entries of data. + # Valid types are: + # - "meta": all key-values with this type can be collected by the generator, to put somewhere together. + # - "ssz": raw SSZ bytes + # - "data": a python structure to be encoded by the user. def entry(*args, **kw): # check generator mode, may be None/else. # "pop" removes it, so it is not passed to the inner function. if kw.pop('generator_mode', False) is True: - out = {} - if description is None: - # fall back on function name for test description - name = fn.__name__ - if name.startswith('test_'): - name = name[5:] - out['description'] = name - else: + + if description is not None: # description can be explicit - out['description'] = description - has_contents = False - # put all generated data into a dict. + yield 'description', 'meta', description + + # transform the yielded data, and add type annotations for data in fn(*args, **kw): - has_contents = True # If there is a type argument, encode it as that type. if len(data) == 3: (key, value, typ) = data - out[key] = encode(value, typ) + yield key, 'data', encode(value, typ) + # TODO: add SSZ bytes as second output else: # Otherwise, try to infer the type, but keep it as-is if it's not a SSZ type or bytes. (key, value) = data if isinstance(value, (SSZValue, bytes)): - out[key] = encode(value) + yield key, 'data', encode(value) + # TODO: add SSZ bytes as second output elif isinstance(value, list) and all([isinstance(el, (SSZValue, bytes)) for el in value]): - out[key] = [encode(el) for el in value] + for i, el in enumerate(value): + yield f'{key}_{i}', 'data', encode(el) + # TODO: add SSZ bytes as second output + yield f'{key}_count', 'meta', len(value) else: # not a ssz value. # It could be vector or bytes still, but it is a rare case, # and lists can't be inferred fully (generics lose element type). # In such cases, explicitly state the type of the yielded value as a third yielded object. - out[key] = value - if has_contents: - return out - else: - return None + # The data will now just be yielded as any python data, + # something that should be encodeable by the generator runner. + yield key, 'data', value else: - # just complete the function, ignore all yielded data, we are not using it + # Just complete the function, ignore all yielded data, + # we are not using it (or processing it, i.e. nearly zero efficiency loss) + # Pytest does not support yielded data in the outer function, so we need to wrap it like this. for _ in fn(*args, **kw): continue return None + return entry + return runner -def with_tags(tags: Dict[str, Any]): +def with_meta_tags(tags: Dict[str, Any]): """ - Decorator factory, adds tags (key, value) pairs to the output of the function. + Decorator factory, yields meta tags (key, value) pairs to the output of the function. Useful to build test-vector annotations with. - This decorator is applied after the ``spectest`` decorator is applied. :param tags: dict of tags :return: Decorator. """ def runner(fn): def entry(*args, **kw): - fn_out = fn(*args, **kw) - # do not add tags if the function is not returning a dict at all (i.e. not in generator mode) - if fn_out is None: - return None - return {**tags, **fn_out} + yielded_any = False + for part in fn(*args, **kw): + yield part + yielded_any = True + # Do not add tags if the function is not returning a dict at all (i.e. not in generator mode). + # As a pytest, we do not want to be yielding anything (unsupported by pytest) + if yielded_any: + for k, v in tags: + yield k, 'meta', v return entry return runner - -def with_args(create_args: Callable[[], Iterable[Any]]): - """ - Decorator factory, adds given extra arguments to the decorated function. - :param create_args: function to create arguments with. - :return: Decorator. - """ - def runner(fn): - # this wraps the function, to hide that the function actually yielding data. - def entry(*args, **kw): - return fn(*(list(create_args()) + list(args)), **kw) - return entry - return runner From e8b3f9985b0e4c50d6ecb38a91c7a6c69567f442 Mon Sep 17 00:00:00 2001 From: protolambda Date: Fri, 26 Jul 2019 22:40:49 +0200 Subject: [PATCH 021/130] update testgen, make epoch proc work --- test_generators/epoch_processing/main.py | 25 +++++++++---------- .../config_helpers/preset_loader/loader.py | 4 +-- test_libs/gen_helpers/gen_base/gen_runner.py | 2 +- test_libs/gen_helpers/gen_base/gen_typing.py | 3 +-- test_libs/gen_helpers/gen_from_tests/gen.py | 4 ++- test_libs/pyspec/eth2spec/test/context.py | 6 ++--- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/test_generators/epoch_processing/main.py b/test_generators/epoch_processing/main.py index da41c9e95..9a6f46ae8 100644 --- a/test_generators/epoch_processing/main.py +++ b/test_generators/epoch_processing/main.py @@ -9,13 +9,12 @@ from eth2spec.test.phase_0.epoch_processing import ( test_process_registry_updates, test_process_slashings ) -from gen_base import gen_runner, gen_suite, gen_typing +from gen_base import gen_runner, gen_typing from gen_from_tests.gen import generate_from_tests from preset_loader import loader -def create_suite(handler_name: str, tests_src, config_name: str) \ - -> Callable[[str], gen_typing.TestProvider]: +def create_provider(handler_name: str, tests_src, config_name: str) -> gen_typing.TestProvider: def prepare_fn(configs_path: str) -> str: presets = loader.load_presets(configs_path, config_name) @@ -36,14 +35,14 @@ def create_suite(handler_name: str, tests_src, config_name: str) \ if __name__ == "__main__": gen_runner.run_generator("epoch_processing", [ - create_suite('crosslinks', test_process_crosslinks, 'minimal'), - create_suite('crosslinks', test_process_crosslinks, 'mainnet'), - create_suite('final_updates', test_process_final_updates, 'minimal'), - create_suite('final_updates', test_process_final_updates, 'mainnet'), - create_suite('justification_and_finalization', test_process_justification_and_finalization, 'minimal'), - create_suite('justification_and_finalization', test_process_justification_and_finalization, 'mainnet'), - create_suite('registry_updates', test_process_registry_updates, 'minimal'), - create_suite('registry_updates', test_process_registry_updates, 'mainnet'), - create_suite('slashings', test_process_slashings, 'minimal'), - create_suite('slashings', test_process_slashings, 'mainnet'), + create_provider('crosslinks', test_process_crosslinks, 'minimal'), + create_provider('crosslinks', test_process_crosslinks, 'mainnet'), + create_provider('final_updates', test_process_final_updates, 'minimal'), + create_provider('final_updates', test_process_final_updates, 'mainnet'), + create_provider('justification_and_finalization', test_process_justification_and_finalization, 'minimal'), + create_provider('justification_and_finalization', test_process_justification_and_finalization, 'mainnet'), + create_provider('registry_updates', test_process_registry_updates, 'minimal'), + create_provider('registry_updates', test_process_registry_updates, 'mainnet'), + create_provider('slashings', test_process_slashings, 'minimal'), + create_provider('slashings', test_process_slashings, 'mainnet'), ]) diff --git a/test_libs/config_helpers/preset_loader/loader.py b/test_libs/config_helpers/preset_loader/loader.py index f37aca393..9d75932df 100644 --- a/test_libs/config_helpers/preset_loader/loader.py +++ b/test_libs/config_helpers/preset_loader/loader.py @@ -10,10 +10,10 @@ from os.path import join def load_presets(configs_dir, presets_name) -> Dict[str, Any]: """ Loads the given preset - :param presets_name: The name of the generator. (lowercase snake_case) + :param presets_name: The name of the presets. (lowercase snake_case) :return: Dictionary, mapping of constant-name -> constant-value """ - path = Path(join(configs_dir, 'constant_presets', presets_name+'.yaml')) + path = Path(join(configs_dir, presets_name+'.yaml')) yaml = YAML(typ='base') loaded = yaml.load(path) out = dict() diff --git a/test_libs/gen_helpers/gen_base/gen_runner.py b/test_libs/gen_helpers/gen_base/gen_runner.py index 9a2d26664..f0867db7e 100644 --- a/test_libs/gen_helpers/gen_base/gen_runner.py +++ b/test_libs/gen_helpers/gen_base/gen_runner.py @@ -1,7 +1,7 @@ import argparse from pathlib import Path import sys -from typing import List +from typing import Iterable from ruamel.yaml import ( YAML, diff --git a/test_libs/gen_helpers/gen_base/gen_typing.py b/test_libs/gen_helpers/gen_base/gen_typing.py index 97ddfa713..34bd71db1 100644 --- a/test_libs/gen_helpers/gen_base/gen_typing.py +++ b/test_libs/gen_helpers/gen_base/gen_typing.py @@ -3,10 +3,9 @@ from typing import ( Callable, Iterable, NewType, - Dict, Tuple, ) -from collections import namedtuple +from dataclasses import dataclass # Elements: name, out_kind, data # diff --git a/test_libs/gen_helpers/gen_from_tests/gen.py b/test_libs/gen_helpers/gen_from_tests/gen.py index 22496de6b..902b0954a 100644 --- a/test_libs/gen_helpers/gen_from_tests/gen.py +++ b/test_libs/gen_helpers/gen_from_tests/gen.py @@ -1,4 +1,5 @@ from inspect import getmembers, isfunction +from typing import Any, Iterable from gen_base.gen_typing import TestCase @@ -34,5 +35,6 @@ def generate_from_tests(runner_name: str, handler_name: str, src: Any, handler_name=handler_name, suite_name='pyspec_tests', case_name=case_name, - case_fn=lambda: tfn(generator_mode=True, fork_name=fork_name, bls_active=bls_active) + # TODO: with_all_phases and other per-phase tooling, should be replaced with per-fork equivalent. + case_fn=lambda: tfn(generator_mode=True, phase=fork_name, bls_active=bls_active) ) diff --git a/test_libs/pyspec/eth2spec/test/context.py b/test_libs/pyspec/eth2spec/test/context.py index 2adb76da0..71d38dcf1 100644 --- a/test_libs/pyspec/eth2spec/test/context.py +++ b/test_libs/pyspec/eth2spec/test/context.py @@ -4,7 +4,7 @@ from eth2spec.utils import bls from .helpers.genesis import create_genesis_state -from .utils import spectest, with_tags +from .utils import spectest, with_meta_tags def with_state(fn): @@ -53,7 +53,7 @@ def expect_assertion_error(fn): # Tags a test to be ignoring BLS for it to pass. -bls_ignored = with_tags({'bls_setting': 2}) +bls_ignored = with_meta_tags({'bls_setting': 2}) def never_bls(fn): @@ -68,7 +68,7 @@ def never_bls(fn): # Tags a test to be requiring BLS for it to pass. -bls_required = with_tags({'bls_setting': 1}) +bls_required = with_meta_tags({'bls_setting': 1}) def always_bls(fn): From 8a83fce3abb3cf492cac7b4ac39c6efab643e9e0 Mon Sep 17 00:00:00 2001 From: protolambda Date: Fri, 26 Jul 2019 23:50:11 +0200 Subject: [PATCH 022/130] fixes to decorator order, and make functions fully yield, with pytest compat. --- test_libs/pyspec/eth2spec/test/context.py | 39 ++++++++++--------- .../test/genesis/test_initialization.py | 4 +- .../eth2spec/test/genesis/test_validity.py | 14 +++---- .../test_process_attestation.py | 2 +- .../test_process_attester_slashing.py | 18 ++++----- .../test_process_block_header.py | 2 +- .../block_processing/test_process_deposit.py | 4 +- .../test_process_proposer_slashing.py | 6 +-- .../block_processing/test_process_transfer.py | 2 +- .../test_process_voluntary_exit.py | 2 +- ...est_process_early_derived_secret_reveal.py | 16 ++++---- .../pyspec/eth2spec/test/test_finality.py | 10 ++--- test_libs/pyspec/eth2spec/test/utils.py | 23 +++++++---- 13 files changed, 75 insertions(+), 67 deletions(-) diff --git a/test_libs/pyspec/eth2spec/test/context.py b/test_libs/pyspec/eth2spec/test/context.py index 71d38dcf1..5a0ddb59d 100644 --- a/test_libs/pyspec/eth2spec/test/context.py +++ b/test_libs/pyspec/eth2spec/test/context.py @@ -4,7 +4,7 @@ from eth2spec.utils import bls from .helpers.genesis import create_genesis_state -from .utils import spectest, with_meta_tags +from .utils import vector_test, with_meta_tags def with_state(fn): @@ -12,7 +12,7 @@ def with_state(fn): try: kw['state'] = create_genesis_state(spec=kw['spec'], num_validators=spec_phase0.SLOTS_PER_EPOCH * 8) except KeyError: - raise TypeError('Spec decorator must come before state decorator to inject spec into state.') + raise TypeError('Spec decorator must come within state decorator to inject spec into state.') return fn(*args, **kw) return entry @@ -27,15 +27,18 @@ def with_state(fn): DEFAULT_BLS_ACTIVE = False -def spectest_with_bls_switch(fn): - # Bls switch must be wrapped by spectest, +def spec_test(fn): + # Bls switch must be wrapped by vector_test, # to fully go through the yielded bls switch data, before setting back the BLS setting. - return spectest()(bls_switch(fn)) + # A test may apply BLS overrides such as @always_bls, + # but if it yields data (n.b. @always_bls yields the bls setting), it should be wrapped by this decorator. + # This is why @alway_bls has its own bls switch, since the override is beyond the reach of the outer switch. + return vector_test()(bls_switch(fn)) -# shorthand for decorating @with_state @spectest() +# shorthand for decorating @spectest() @with_state def spec_state_test(fn): - return with_state(spectest_with_bls_switch(fn)) + return spec_test(with_state(fn)) def expect_assertion_error(fn): @@ -52,40 +55,38 @@ def expect_assertion_error(fn): raise AssertionError('expected an assertion error, but got none.') -# Tags a test to be ignoring BLS for it to pass. -bls_ignored = with_meta_tags({'bls_setting': 2}) - - def never_bls(fn): """ Decorator to apply on ``bls_switch`` decorator to force BLS de-activation. Useful to mark tests as BLS-ignorant. + This decorator may only be applied to yielding spec test functions, and should be wrapped by vector_test, + as the yielding needs to complete before setting back the BLS setting. """ def entry(*args, **kw): # override bls setting kw['bls_active'] = False - return fn(*args, **kw) - return bls_ignored(entry) - - -# Tags a test to be requiring BLS for it to pass. -bls_required = with_meta_tags({'bls_setting': 1}) + return bls_switch(fn)(*args, **kw) + return with_meta_tags({'bls_setting': 2})(entry) def always_bls(fn): """ Decorator to apply on ``bls_switch`` decorator to force BLS activation. Useful to mark tests as BLS-dependent. + This decorator may only be applied to yielding spec test functions, and should be wrapped by vector_test, + as the yielding needs to complete before setting back the BLS setting. """ def entry(*args, **kw): # override bls setting kw['bls_active'] = True - return fn(*args, **kw) - return bls_required(entry) + return bls_switch(fn)(*args, **kw) + return with_meta_tags({'bls_setting': 1})(entry) def bls_switch(fn): """ Decorator to make a function execute with BLS ON, or BLS off. Based on an optional bool argument ``bls_active``, passed to the function at runtime. + This decorator may only be applied to yielding spec test functions, and should be wrapped by vector_test, + as the yielding needs to complete before setting back the BLS setting. """ def entry(*args, **kw): old_state = bls.bls_active diff --git a/test_libs/pyspec/eth2spec/test/genesis/test_initialization.py b/test_libs/pyspec/eth2spec/test/genesis/test_initialization.py index b95b70fef..2ff57be74 100644 --- a/test_libs/pyspec/eth2spec/test/genesis/test_initialization.py +++ b/test_libs/pyspec/eth2spec/test/genesis/test_initialization.py @@ -1,11 +1,11 @@ -from eth2spec.test.context import spectest_with_bls_switch, with_phases +from eth2spec.test.context import spec_test, with_phases from eth2spec.test.helpers.deposits import ( prepare_genesis_deposits, ) @with_phases(['phase0']) -@spectest_with_bls_switch +@spec_test def test_initialize_beacon_state_from_eth1(spec): deposit_count = spec.MIN_GENESIS_ACTIVE_VALIDATOR_COUNT deposits, deposit_root = prepare_genesis_deposits(spec, deposit_count, spec.MAX_EFFECTIVE_BALANCE, signed=True) diff --git a/test_libs/pyspec/eth2spec/test/genesis/test_validity.py b/test_libs/pyspec/eth2spec/test/genesis/test_validity.py index bb95bb2b0..07ad3a73c 100644 --- a/test_libs/pyspec/eth2spec/test/genesis/test_validity.py +++ b/test_libs/pyspec/eth2spec/test/genesis/test_validity.py @@ -1,4 +1,4 @@ -from eth2spec.test.context import spectest_with_bls_switch, with_phases +from eth2spec.test.context import spec_test, with_phases from eth2spec.test.helpers.deposits import ( prepare_genesis_deposits, ) @@ -26,7 +26,7 @@ def run_is_valid_genesis_state(spec, state, valid=True): @with_phases(['phase0']) -@spectest_with_bls_switch +@spec_test def test_is_valid_genesis_state_true(spec): state = create_valid_beacon_state(spec) @@ -34,7 +34,7 @@ def test_is_valid_genesis_state_true(spec): @with_phases(['phase0']) -@spectest_with_bls_switch +@spec_test def test_is_valid_genesis_state_false_invalid_timestamp(spec): state = create_valid_beacon_state(spec) state.genesis_time = spec.MIN_GENESIS_TIME - 1 @@ -43,7 +43,7 @@ def test_is_valid_genesis_state_false_invalid_timestamp(spec): @with_phases(['phase0']) -@spectest_with_bls_switch +@spec_test def test_is_valid_genesis_state_true_more_balance(spec): state = create_valid_beacon_state(spec) state.validators[0].effective_balance = spec.MAX_EFFECTIVE_BALANCE + 1 @@ -53,7 +53,7 @@ def test_is_valid_genesis_state_true_more_balance(spec): # TODO: not part of the genesis function yet. Erroneously merged. # @with_phases(['phase0']) -# @spectest_with_bls_switch +# @spec_test # def test_is_valid_genesis_state_false_not_enough_balance(spec): # state = create_valid_beacon_state(spec) # state.validators[0].effective_balance = spec.MAX_EFFECTIVE_BALANCE - 1 @@ -62,7 +62,7 @@ def test_is_valid_genesis_state_true_more_balance(spec): @with_phases(['phase0']) -@spectest_with_bls_switch +@spec_test def test_is_valid_genesis_state_true_one_more_validator(spec): deposit_count = spec.MIN_GENESIS_ACTIVE_VALIDATOR_COUNT + 1 deposits, _ = prepare_genesis_deposits(spec, deposit_count, spec.MAX_EFFECTIVE_BALANCE, signed=True) @@ -75,7 +75,7 @@ def test_is_valid_genesis_state_true_one_more_validator(spec): @with_phases(['phase0']) -@spectest_with_bls_switch +@spec_test def test_is_valid_genesis_state_false_not_enough_validator(spec): deposit_count = spec.MIN_GENESIS_ACTIVE_VALIDATOR_COUNT - 1 deposits, _ = prepare_genesis_deposits(spec, deposit_count, spec.MAX_EFFECTIVE_BALANCE, signed=True) diff --git a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py index ab46a0d8c..ee1c1b397 100644 --- a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py +++ b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py @@ -116,8 +116,8 @@ def test_wrong_end_epoch_with_max_epochs_per_crosslink(spec, state): @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_attestation_signature(spec, state): attestation = get_valid_attestation(spec, state) state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY diff --git a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attester_slashing.py b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attester_slashing.py index 7a6030157..20a510648 100644 --- a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attester_slashing.py +++ b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attester_slashing.py @@ -108,8 +108,8 @@ def test_success_surround(spec, state): @with_all_phases -@always_bls @spec_state_test +@always_bls def test_success_already_exited_recent(spec, state): attester_slashing = get_valid_attester_slashing(spec, state, signed_1=True, signed_2=True) slashed_indices = ( @@ -123,8 +123,8 @@ def test_success_already_exited_recent(spec, state): @with_all_phases -@always_bls @spec_state_test +@always_bls def test_success_already_exited_long_ago(spec, state): attester_slashing = get_valid_attester_slashing(spec, state, signed_1=True, signed_2=True) slashed_indices = ( @@ -139,24 +139,24 @@ def test_success_already_exited_long_ago(spec, state): @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_sig_1(spec, state): attester_slashing = get_valid_attester_slashing(spec, state, signed_1=False, signed_2=True) yield from run_attester_slashing_processing(spec, state, attester_slashing, False) @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_sig_2(spec, state): attester_slashing = get_valid_attester_slashing(spec, state, signed_1=True, signed_2=False) yield from run_attester_slashing_processing(spec, state, attester_slashing, False) @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_sig_1_and_2(spec, state): attester_slashing = get_valid_attester_slashing(spec, state, signed_1=False, signed_2=False) yield from run_attester_slashing_processing(spec, state, attester_slashing, False) @@ -212,9 +212,9 @@ def test_custody_bit_0_and_1_intersect(spec, state): yield from run_attester_slashing_processing(spec, state, attester_slashing, False) -@always_bls @with_all_phases @spec_state_test +@always_bls def test_att1_bad_extra_index(spec, state): attester_slashing = get_valid_attester_slashing(spec, state, signed_1=True, signed_2=True) @@ -228,9 +228,9 @@ def test_att1_bad_extra_index(spec, state): yield from run_attester_slashing_processing(spec, state, attester_slashing, False) -@always_bls @with_all_phases @spec_state_test +@always_bls def test_att1_bad_replaced_index(spec, state): attester_slashing = get_valid_attester_slashing(spec, state, signed_1=True, signed_2=True) @@ -244,9 +244,9 @@ def test_att1_bad_replaced_index(spec, state): yield from run_attester_slashing_processing(spec, state, attester_slashing, False) -@always_bls @with_all_phases @spec_state_test +@always_bls def test_att2_bad_extra_index(spec, state): attester_slashing = get_valid_attester_slashing(spec, state, signed_1=True, signed_2=True) @@ -260,9 +260,9 @@ def test_att2_bad_extra_index(spec, state): yield from run_attester_slashing_processing(spec, state, attester_slashing, False) -@always_bls @with_all_phases @spec_state_test +@always_bls def test_att2_bad_replaced_index(spec, state): attester_slashing = get_valid_attester_slashing(spec, state, signed_1=True, signed_2=True) diff --git a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_block_header.py b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_block_header.py index a2306ef4d..c790c612c 100644 --- a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_block_header.py +++ b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_block_header.py @@ -42,8 +42,8 @@ def test_success_block_header(spec, state): @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_sig_block_header(spec, state): block = build_empty_block_for_next_slot(spec, state) yield from run_block_header_processing(spec, state, block, valid=False) diff --git a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_deposit.py b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_deposit.py index 3dbbeedf0..d1ffbd7c9 100644 --- a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_deposit.py +++ b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_deposit.py @@ -94,8 +94,8 @@ def test_new_deposit_over_max(spec, state): @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_sig_new_deposit(spec, state): # fresh deposit = next validator index = validator appended to registry validator_index = len(state.validators) @@ -115,8 +115,8 @@ def test_success_top_up(spec, state): @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_sig_top_up(spec, state): validator_index = 0 amount = spec.MAX_EFFECTIVE_BALANCE // 4 diff --git a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_proposer_slashing.py b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_proposer_slashing.py index af34ea709..5eaec9f03 100644 --- a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_proposer_slashing.py +++ b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_proposer_slashing.py @@ -49,24 +49,24 @@ def test_success(spec, state): @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_sig_1(spec, state): proposer_slashing = get_valid_proposer_slashing(spec, state, signed_1=False, signed_2=True) yield from run_proposer_slashing_processing(spec, state, proposer_slashing, False) @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_sig_2(spec, state): proposer_slashing = get_valid_proposer_slashing(spec, state, signed_1=True, signed_2=False) yield from run_proposer_slashing_processing(spec, state, proposer_slashing, False) @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_sig_1_and_2(spec, state): proposer_slashing = get_valid_proposer_slashing(spec, state, signed_1=False, signed_2=False) yield from run_proposer_slashing_processing(spec, state, proposer_slashing, False) diff --git a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_transfer.py b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_transfer.py index f079ff578..1b839562e 100644 --- a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_transfer.py +++ b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_transfer.py @@ -81,8 +81,8 @@ def test_success_active_above_max_effective_fee(spec, state): @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_signature(spec, state): transfer = get_valid_transfer(spec, state) # un-activate so validator can transfer diff --git a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_voluntary_exit.py b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_voluntary_exit.py index 6c9298ecc..155f70621 100644 --- a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_voluntary_exit.py +++ b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_voluntary_exit.py @@ -47,8 +47,8 @@ def test_success(spec, state): @with_all_phases -@always_bls @spec_state_test +@always_bls def test_invalid_signature(spec, state): # move state forward PERSISTENT_COMMITTEE_PERIOD epochs to allow for exit state.slot += spec.PERSISTENT_COMMITTEE_PERIOD * spec.SLOTS_PER_EPOCH diff --git a/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_early_derived_secret_reveal.py b/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_early_derived_secret_reveal.py index 831ad35a5..3c7434dfc 100644 --- a/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_early_derived_secret_reveal.py +++ b/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_early_derived_secret_reveal.py @@ -42,8 +42,8 @@ def run_early_derived_secret_reveal_processing(spec, state, randao_key_reveal, v @with_all_phases_except(['phase0']) -@always_bls @spec_state_test +@always_bls def test_success(spec, state): randao_key_reveal = get_valid_early_derived_secret_reveal(spec, state) @@ -51,8 +51,8 @@ def test_success(spec, state): @with_all_phases_except(['phase0']) -@never_bls @spec_state_test +@never_bls def test_reveal_from_current_epoch(spec, state): randao_key_reveal = get_valid_early_derived_secret_reveal(spec, state, spec.get_current_epoch(state)) @@ -60,8 +60,8 @@ def test_reveal_from_current_epoch(spec, state): @with_all_phases_except(['phase0']) -@never_bls @spec_state_test +@never_bls def test_reveal_from_past_epoch(spec, state): next_epoch(spec, state) apply_empty_block(spec, state) @@ -71,8 +71,8 @@ def test_reveal_from_past_epoch(spec, state): @with_all_phases_except(['phase0']) -@always_bls @spec_state_test +@always_bls def test_reveal_with_custody_padding(spec, state): randao_key_reveal = get_valid_early_derived_secret_reveal( spec, @@ -83,8 +83,8 @@ def test_reveal_with_custody_padding(spec, state): @with_all_phases_except(['phase0']) -@always_bls @spec_state_test +@always_bls def test_reveal_with_custody_padding_minus_one(spec, state): randao_key_reveal = get_valid_early_derived_secret_reveal( spec, @@ -95,8 +95,8 @@ def test_reveal_with_custody_padding_minus_one(spec, state): @with_all_phases_except(['phase0']) -@never_bls @spec_state_test +@never_bls def test_double_reveal(spec, state): randao_key_reveal1 = get_valid_early_derived_secret_reveal( spec, @@ -120,8 +120,8 @@ def test_double_reveal(spec, state): @with_all_phases_except(['phase0']) -@never_bls @spec_state_test +@never_bls def test_revealer_is_slashed(spec, state): randao_key_reveal = get_valid_early_derived_secret_reveal(spec, state, spec.get_current_epoch(state)) state.validators[randao_key_reveal.revealed_index].slashed = True @@ -130,8 +130,8 @@ def test_revealer_is_slashed(spec, state): @with_all_phases_except(['phase0']) -@never_bls @spec_state_test +@never_bls def test_far_future_epoch(spec, state): randao_key_reveal = get_valid_early_derived_secret_reveal( spec, diff --git a/test_libs/pyspec/eth2spec/test/test_finality.py b/test_libs/pyspec/eth2spec/test/test_finality.py index 6250a685d..8ae50d436 100644 --- a/test_libs/pyspec/eth2spec/test/test_finality.py +++ b/test_libs/pyspec/eth2spec/test/test_finality.py @@ -29,8 +29,8 @@ def check_finality(spec, @with_all_phases -@never_bls @spec_state_test +@never_bls def test_finality_no_updates_at_genesis(spec, state): assert spec.get_current_epoch(state) == spec.GENESIS_EPOCH @@ -53,8 +53,8 @@ def test_finality_no_updates_at_genesis(spec, state): @with_all_phases -@never_bls @spec_state_test +@never_bls def test_finality_rule_4(spec, state): # get past first two epochs that finality does not run on next_epoch(spec, state) @@ -81,8 +81,8 @@ def test_finality_rule_4(spec, state): @with_all_phases -@never_bls @spec_state_test +@never_bls def test_finality_rule_1(spec, state): # get past first two epochs that finality does not run on next_epoch(spec, state) @@ -111,8 +111,8 @@ def test_finality_rule_1(spec, state): @with_all_phases -@never_bls @spec_state_test +@never_bls def test_finality_rule_2(spec, state): # get past first two epochs that finality does not run on next_epoch(spec, state) @@ -143,8 +143,8 @@ def test_finality_rule_2(spec, state): @with_all_phases -@never_bls @spec_state_test +@never_bls def test_finality_rule_3(spec, state): """ Test scenario described here diff --git a/test_libs/pyspec/eth2spec/test/utils.py b/test_libs/pyspec/eth2spec/test/utils.py index 4ecabb114..59289db59 100644 --- a/test_libs/pyspec/eth2spec/test/utils.py +++ b/test_libs/pyspec/eth2spec/test/utils.py @@ -3,10 +3,13 @@ from eth2spec.debug.encode import encode from eth2spec.utils.ssz.ssz_typing import SSZValue -def spectest(description: str = None): +def vector_test(description: str = None): """ - Spectest decorator, should always be the most outer decorator around functions that yield data. - to deal with silent iteration through yielding function when in a pytest context (i.e. not in generator mode). + vector_test decorator: Allow a caller to pass "generator_mode=True" to make the test yield data, + but behave like a normal test (ignoring the yield, but fully processing) a test when not in "generator_mode" + This should always be the most outer decorator around functions that yield data. + This is to deal with silent iteration through yielding function when in a pytest + context (i.e. not in generator mode). :param description: Optional description for the test to add to the metadata. :return: Decorator. """ @@ -17,10 +20,8 @@ def spectest(description: str = None): # - "ssz": raw SSZ bytes # - "data": a python structure to be encoded by the user. def entry(*args, **kw): - # check generator mode, may be None/else. - # "pop" removes it, so it is not passed to the inner function. - if kw.pop('generator_mode', False) is True: + def generator_mode(): if description is not None: # description can be explicit yield 'description', 'meta', description @@ -51,6 +52,13 @@ def spectest(description: str = None): # The data will now just be yielded as any python data, # something that should be encodeable by the generator runner. yield key, 'data', value + + # check generator mode, may be None/else. + # "pop" removes it, so it is not passed to the inner function. + if kw.pop('generator_mode', False) is True: + # return the yielding function as a generator object. + # Don't yield in this function itself, that would make pytest skip over it. + return generator_mode() else: # Just complete the function, ignore all yielded data, # we are not using it (or processing it, i.e. nearly zero efficiency loss) @@ -80,8 +88,7 @@ def with_meta_tags(tags: Dict[str, Any]): # Do not add tags if the function is not returning a dict at all (i.e. not in generator mode). # As a pytest, we do not want to be yielding anything (unsupported by pytest) if yielded_any: - for k, v in tags: + for k, v in tags.items(): yield k, 'meta', v return entry return runner - From d7728e60c98071c1df5828bcddc7644c8dcae69c Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 27 Jul 2019 00:26:05 +0200 Subject: [PATCH 023/130] handle meta tags passed from inner testgen decorator --- test_generators/epoch_processing/main.py | 2 +- test_generators/operations/main.py | 58 +++++++++++------------ test_libs/pyspec/eth2spec/debug/encode.py | 2 +- test_libs/pyspec/eth2spec/test/utils.py | 40 +++++++--------- 4 files changed, 49 insertions(+), 53 deletions(-) diff --git a/test_generators/epoch_processing/main.py b/test_generators/epoch_processing/main.py index 9a6f46ae8..f0505ee94 100644 --- a/test_generators/epoch_processing/main.py +++ b/test_generators/epoch_processing/main.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Iterable from eth2spec.phase0 import spec as spec_phase0 from eth2spec.phase1 import spec as spec_phase1 diff --git a/test_generators/operations/main.py b/test_generators/operations/main.py index b61e98526..995a626b4 100644 --- a/test_generators/operations/main.py +++ b/test_generators/operations/main.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Iterable from eth2spec.test.phase_0.block_processing import ( test_process_attestation, @@ -10,48 +10,48 @@ from eth2spec.test.phase_0.block_processing import ( test_process_voluntary_exit, ) -from gen_base import gen_runner, gen_suite, gen_typing +from gen_base import gen_runner, gen_typing from gen_from_tests.gen import generate_from_tests from preset_loader import loader from eth2spec.phase0 import spec as spec_phase0 from eth2spec.phase1 import spec as spec_phase1 -def create_suite(operation_name: str, config_name: str, get_cases: Callable[[], Iterable[gen_typing.TestCase]]) \ - -> Callable[[str], gen_typing.TestSuiteOutput]: - def suite_definition(configs_path: str) -> gen_typing.TestSuiteOutput: +def create_provider(handler_name: str, tests_src, config_name: str) -> gen_typing.TestProvider: + + def prepare_fn(configs_path: str) -> str: presets = loader.load_presets(configs_path, config_name) spec_phase0.apply_constants_preset(presets) spec_phase1.apply_constants_preset(presets) + return config_name - return ("%s_%s" % (operation_name, config_name), operation_name, gen_suite.render_suite( - title="%s operation" % operation_name, - summary="Test suite for %s type operation processing" % operation_name, - forks_timeline="testing", - forks=["phase0"], - config=config_name, - runner="operations", - handler=operation_name, - test_cases=get_cases())) - return suite_definition + def cases_fn() -> Iterable[gen_typing.TestCase]: + return generate_from_tests( + runner_name='operations', + handler_name=handler_name, + src=tests_src, + fork_name='phase0' + ) + + return gen_typing.TestProvider(prepare=prepare_fn, make_cases=cases_fn) if __name__ == "__main__": gen_runner.run_generator("operations", [ - create_suite('attestation', 'minimal', lambda: generate_from_tests(test_process_attestation, 'phase0')), - create_suite('attestation', 'mainnet', lambda: generate_from_tests(test_process_attestation, 'phase0')), - create_suite('attester_slashing', 'minimal', lambda: generate_from_tests(test_process_attester_slashing, 'phase0')), - create_suite('attester_slashing', 'mainnet', lambda: generate_from_tests(test_process_attester_slashing, 'phase0')), - create_suite('block_header', 'minimal', lambda: generate_from_tests(test_process_block_header, 'phase0')), - create_suite('block_header', 'mainnet', lambda: generate_from_tests(test_process_block_header, 'phase0')), - create_suite('deposit', 'minimal', lambda: generate_from_tests(test_process_deposit, 'phase0')), - create_suite('deposit', 'mainnet', lambda: generate_from_tests(test_process_deposit, 'phase0')), - create_suite('proposer_slashing', 'minimal', lambda: generate_from_tests(test_process_proposer_slashing, 'phase0')), - create_suite('proposer_slashing', 'mainnet', lambda: generate_from_tests(test_process_proposer_slashing, 'phase0')), - create_suite('transfer', 'minimal', lambda: generate_from_tests(test_process_transfer, 'phase0')), + create_provider('attestation', test_process_attestation, 'minimal'), + create_provider('attestation', test_process_attestation, 'mainnet'), + create_provider('attester_slashing', test_process_attester_slashing, 'minimal'), + create_provider('attester_slashing', test_process_attester_slashing, 'mainnet'), + create_provider('block_header', test_process_block_header, 'minimal'), + create_provider('block_header', test_process_block_header, 'mainnet'), + create_provider('deposit', test_process_deposit, 'minimal'), + create_provider('deposit', test_process_deposit, 'mainnet'), + create_provider('proposer_slashing', test_process_proposer_slashing, 'minimal'), + create_provider('proposer_slashing', test_process_proposer_slashing, 'mainnet'), + create_provider('transfer', test_process_transfer, 'minimal'), # Disabled, due to the high amount of different transfer tests, this produces a shocking size of tests. # Unnecessarily, as transfer are disabled currently, so not a priority. - # create_suite('transfer', 'mainnet', lambda: generate_from_tests(test_process_transfer, 'phase0')), - create_suite('voluntary_exit', 'minimal', lambda: generate_from_tests(test_process_voluntary_exit, 'phase0')), - create_suite('voluntary_exit', 'mainnet', lambda: generate_from_tests(test_process_voluntary_exit, 'phase0')), + # create_provider('transfer', test_process_transfer, 'mainnet'), + create_provider('voluntary_exit', test_process_voluntary_exit, 'minimal'), + create_provider('voluntary_exit', test_process_voluntary_exit, 'mainnet'), ]) diff --git a/test_libs/pyspec/eth2spec/debug/encode.py b/test_libs/pyspec/eth2spec/debug/encode.py index ac4bd9df2..d59f15640 100644 --- a/test_libs/pyspec/eth2spec/debug/encode.py +++ b/test_libs/pyspec/eth2spec/debug/encode.py @@ -29,4 +29,4 @@ def encode(value, include_hash_tree_roots=False): ret["hash_tree_root"] = '0x' + hash_tree_root(value).hex() return ret else: - raise Exception(f"Type not recognized: value={value}, typ={value.type()}") + raise Exception(f"Type not recognized: value={value}, typ={type(value)}") diff --git a/test_libs/pyspec/eth2spec/test/utils.py b/test_libs/pyspec/eth2spec/test/utils.py index 59289db59..e15c5efeb 100644 --- a/test_libs/pyspec/eth2spec/test/utils.py +++ b/test_libs/pyspec/eth2spec/test/utils.py @@ -28,30 +28,26 @@ def vector_test(description: str = None): # transform the yielded data, and add type annotations for data in fn(*args, **kw): - # If there is a type argument, encode it as that type. - if len(data) == 3: - (key, value, typ) = data - yield key, 'data', encode(value, typ) + # if not 2 items, then it is assumed to be already formatted with a type: + # e.g. ("bls_setting", "meta", 1) + if len(data) != 2: + yield data + continue + # Try to infer the type, but keep it as-is if it's not a SSZ type or bytes. + (key, value) = data + if isinstance(value, (SSZValue, bytes)): + yield key, 'data', encode(value) # TODO: add SSZ bytes as second output - else: - # Otherwise, try to infer the type, but keep it as-is if it's not a SSZ type or bytes. - (key, value) = data - if isinstance(value, (SSZValue, bytes)): - yield key, 'data', encode(value) + elif isinstance(value, list) and all([isinstance(el, (SSZValue, bytes)) for el in value]): + for i, el in enumerate(value): + yield f'{key}_{i}', 'data', encode(el) # TODO: add SSZ bytes as second output - elif isinstance(value, list) and all([isinstance(el, (SSZValue, bytes)) for el in value]): - for i, el in enumerate(value): - yield f'{key}_{i}', 'data', encode(el) - # TODO: add SSZ bytes as second output - yield f'{key}_count', 'meta', len(value) - else: - # not a ssz value. - # It could be vector or bytes still, but it is a rare case, - # and lists can't be inferred fully (generics lose element type). - # In such cases, explicitly state the type of the yielded value as a third yielded object. - # The data will now just be yielded as any python data, - # something that should be encodeable by the generator runner. - yield key, 'data', value + yield f'{key}_count', 'meta', len(value) + else: + # Not a ssz value. + # The data will now just be yielded as any python data, + # something that should be encodeable by the generator runner. + yield key, 'data', value # check generator mode, may be None/else. # "pop" removes it, so it is not passed to the inner function. From 77484c33ec237d64692e73bd96126c5e8554fcaf Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 27 Jul 2019 00:28:47 +0200 Subject: [PATCH 024/130] make sure new config loader change is working --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a43430f77..b612378e2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -35,13 +35,13 @@ commands: description: "Restore the cache with pyspec keys" steps: - restore_cached_venv: - venv_name: v3-pyspec-bump2 + venv_name: v4-pyspec reqs_checksum: cache-{{ checksum "test_libs/pyspec/requirements.txt" }}-{{ checksum "test_libs/pyspec/requirements-testing.txt" }} save_pyspec_cached_venv: description: Save a venv into a cache with pyspec keys" steps: - save_cached_venv: - venv_name: v3-pyspec-bump2 + venv_name: v4-pyspec reqs_checksum: cache-{{ checksum "test_libs/pyspec/requirements.txt" }}-{{ checksum "test_libs/pyspec/requirements-testing.txt" }} venv_path: ./test_libs/pyspec/venv restore_deposit_contract_cached_venv: From 62c917a2a9667e3c9d2654bfa98b51d86716b72e Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 27 Jul 2019 02:22:19 +0200 Subject: [PATCH 025/130] update shuffling test gen --- test_generators/shuffling/main.py | 67 ++++++++++++++----------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/test_generators/shuffling/main.py b/test_generators/shuffling/main.py index adfab8cfb..6425c708a 100644 --- a/test_generators/shuffling/main.py +++ b/test_generators/shuffling/main.py @@ -1,54 +1,49 @@ from eth2spec.phase0 import spec as spec -from eth_utils import ( - to_dict, to_tuple -) -from gen_base import gen_runner, gen_suite, gen_typing +from eth_utils import to_tuple +from gen_base import gen_runner, gen_typing from preset_loader import loader +from typing import Iterable + + +def shuffling_case_fn(seed, count): + yield 'mapping', 'data', { + 'seed': '0x' + seed.hex(), + 'count': count, + 'mapping': [int(spec.compute_shuffled_index(i, count, seed)) for i in range(count)] + } -@to_dict def shuffling_case(seed, count): - yield 'seed', '0x' + seed.hex() - yield 'count', count - yield 'shuffled', [int(spec.compute_shuffled_index(i, count, seed)) for i in range(count)] + return f'shuffle_0x{seed.hex()}_{count}', lambda: shuffling_case_fn(seed, count) @to_tuple def shuffling_test_cases(): - for seed in [spec.hash(spec.int_to_bytes(seed_init_value, length=4)) for seed_init_value in range(30)]: - for count in [0, 1, 2, 3, 5, 10, 33, 100, 1000]: + for seed in [spec.hash(seed_init_value.to_bytes(length=4, byteorder='little')) for seed_init_value in range(30)]: + for count in [0, 1, 2, 3, 5, 10, 33, 100, 1000, 9999]: yield shuffling_case(seed, count) -def mini_shuffling_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - presets = loader.load_presets(configs_path, 'minimal') - spec.apply_constants_preset(presets) +def create_provider(config_name: str) -> gen_typing.TestProvider: - return ("shuffling_minimal", "core", gen_suite.render_suite( - title="Swap-or-Not Shuffling tests with minimal config", - summary="Swap or not shuffling, with minimally configured testing round-count", - forks_timeline="testing", - forks=["phase0"], - config="minimal", - runner="shuffling", - handler="core", - test_cases=shuffling_test_cases())) + def prepare_fn(configs_path: str) -> str: + presets = loader.load_presets(configs_path, config_name) + spec.apply_constants_preset(presets) + return config_name + def cases_fn() -> Iterable[gen_typing.TestCase]: + for (case_name, case_fn) in shuffling_test_cases(): + yield gen_typing.TestCase( + fork_name='phase0', + runner_name='shuffling', + handler_name='core', + suite_name='shuffle', + case_name=case_name, + case_fn=case_fn + ) -def full_shuffling_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - presets = loader.load_presets(configs_path, 'mainnet') - spec.apply_constants_preset(presets) - - return ("shuffling_full", "core", gen_suite.render_suite( - title="Swap-or-Not Shuffling tests with mainnet config", - summary="Swap or not shuffling, with normal configured (secure) mainnet round-count", - forks_timeline="mainnet", - forks=["phase0"], - config="mainnet", - runner="shuffling", - handler="core", - test_cases=shuffling_test_cases())) + return gen_typing.TestProvider(prepare=prepare_fn, make_cases=cases_fn) if __name__ == "__main__": - gen_runner.run_generator("shuffling", [mini_shuffling_suite, full_shuffling_suite]) + gen_runner.run_generator("shuffling", [create_provider("minimal"), create_provider("mainnet")]) From badd3251ed6d12ddbf01e38f75926ca69ca50e8c Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 27 Jul 2019 03:07:37 +0200 Subject: [PATCH 026/130] update BLS suite to split test cases --- test_generators/bls/main.py | 134 +++++++++++------------------------- 1 file changed, 42 insertions(+), 92 deletions(-) diff --git a/test_generators/bls/main.py b/test_generators/bls/main.py index 2e328d1dd..914983f43 100644 --- a/test_generators/bls/main.py +++ b/test_generators/bls/main.py @@ -2,23 +2,21 @@ BLS test vectors generator """ -from typing import Tuple +from typing import Tuple, Iterable, Any, Callable, Dict from eth_utils import ( encode_hex, int_to_big_endian, - to_tuple, ) -from gen_base import gen_runner, gen_suite, gen_typing +from gen_base import gen_runner, gen_typing from py_ecc import bls - F2Q_COEFF_LEN = 48 G2_COMPRESSED_Z_LEN = 48 -def int_to_hex(n: int, byte_length: int=None) -> str: +def int_to_hex(n: int, byte_length: int = None) -> str: byte_value = int_to_big_endian(n) if byte_length: byte_value = byte_value.rjust(byte_length, b'\x00') @@ -32,6 +30,9 @@ def hex_to_int(x: str) -> int: DOMAINS = [ b'\x00\x00\x00\x00\x00\x00\x00\x00', b'\x00\x00\x00\x00\x00\x00\x00\x01', + b'\x01\x00\x00\x00\x00\x00\x00\x00', + b'\x80\x00\x00\x00\x00\x00\x00\x00', + b'\x01\x23\x45\x67\x89\xab\xcd\xef', b'\xff\xff\xff\xff\xff\xff\xff\xff' ] @@ -51,7 +52,7 @@ PRIVKEYS = [ def hash_message(msg: bytes, - domain: bytes) ->Tuple[Tuple[str, str], Tuple[str, str], Tuple[str, str]]: + domain: bytes) -> Tuple[Tuple[str, str], Tuple[str, str], Tuple[str, str]]: """ Hash message Input: @@ -82,11 +83,10 @@ def hash_message_compressed(msg: bytes, domain: bytes) -> Tuple[str, str]: return [int_to_hex(z1, G2_COMPRESSED_Z_LEN), int_to_hex(z2, G2_COMPRESSED_Z_LEN)] -@to_tuple def case01_message_hash_G2_uncompressed(): for msg in MESSAGES: for domain in DOMAINS: - yield { + yield f'uncom_g2_hash_{encode_hex(msg)}_{encode_hex(domain)}', { 'input': { 'message': encode_hex(msg), 'domain': encode_hex(domain), @@ -94,11 +94,11 @@ def case01_message_hash_G2_uncompressed(): 'output': hash_message(msg, domain) } -@to_tuple + def case02_message_hash_G2_compressed(): for msg in MESSAGES: for domain in DOMAINS: - yield { + yield f'com_g2_hash_{encode_hex(msg)}_{encode_hex(domain)}', { 'input': { 'message': encode_hex(msg), 'domain': encode_hex(domain), @@ -106,23 +106,23 @@ def case02_message_hash_G2_compressed(): 'output': hash_message_compressed(msg, domain) } -@to_tuple + def case03_private_to_public_key(): pubkeys = [bls.privtopub(privkey) for privkey in PRIVKEYS] pubkeys_serial = ['0x' + pubkey.hex() for pubkey in pubkeys] for privkey, pubkey_serial in zip(PRIVKEYS, pubkeys_serial): - yield { + yield f'priv_to_pub_{int_to_hex(privkey)}', { 'input': int_to_hex(privkey), 'output': pubkey_serial, } -@to_tuple + def case04_sign_messages(): for privkey in PRIVKEYS: for message in MESSAGES: for domain in DOMAINS: sig = bls.sign(message, privkey, domain) - yield { + yield f'sign_msg_{int_to_hex(privkey)}_{encode_hex(message)}_{encode_hex(domain)}', { 'input': { 'privkey': int_to_hex(privkey), 'message': encode_hex(message), @@ -131,25 +131,25 @@ def case04_sign_messages(): 'output': encode_hex(sig) } + # TODO: case05_verify_messages: Verify messages signed in case04 # It takes too long, empty for now -@to_tuple def case06_aggregate_sigs(): for domain in DOMAINS: for message in MESSAGES: sigs = [bls.sign(message, privkey, domain) for privkey in PRIVKEYS] - yield { + yield f'agg_sigs_{encode_hex(message)}_{encode_hex(domain)}', { 'input': [encode_hex(sig) for sig in sigs], 'output': encode_hex(bls.aggregate_signatures(sigs)), } -@to_tuple + def case07_aggregate_pubkeys(): pubkeys = [bls.privtopub(privkey) for privkey in PRIVKEYS] pubkeys_serial = [encode_hex(pubkey) for pubkey in pubkeys] - yield { + yield f'agg_pub_keys', { 'input': pubkeys_serial, 'output': encode_hex(bls.aggregate_pubkeys(pubkeys)), } @@ -162,85 +162,35 @@ def case07_aggregate_pubkeys(): # Proof-of-possession -def bls_msg_hash_uncompressed_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - return ("g2_uncompressed", "msg_hash_g2_uncompressed", gen_suite.render_suite( - title="BLS G2 Uncompressed msg hash", - summary="BLS G2 Uncompressed msg hash", - forks_timeline="mainnet", - forks=["phase0"], - config="mainnet", - runner="bls", - handler="msg_hash_uncompressed", - test_cases=case01_message_hash_G2_uncompressed())) +def create_provider(handler_name: str, + test_case_fn: Callable[[], Iterable[Tuple[str, Dict[str, Any]]]]) -> gen_typing.TestProvider: + def prepare_fn(configs_path: str) -> str: + # Nothing to load / change in spec. Maybe in future forks. Put the tests into the minimal config category. + return 'minimal' -def bls_msg_hash_compressed_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - return ("g2_compressed", "msg_hash_g2_compressed", gen_suite.render_suite( - title="BLS G2 Compressed msg hash", - summary="BLS G2 Compressed msg hash", - forks_timeline="mainnet", - forks=["phase0"], - config="mainnet", - runner="bls", - handler="msg_hash_compressed", - test_cases=case02_message_hash_G2_compressed())) + def cases_fn() -> Iterable[gen_typing.TestCase]: + for data in test_case_fn(): + print(data) + (case_name, case_content) = data + yield gen_typing.TestCase( + fork_name='phase0', + runner_name='bls', + handler_name=handler_name, + suite_name='small', + case_name=case_name, + case_fn=lambda: [('data', 'data', case_content)] + ) - - -def bls_priv_to_pub_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - return ("priv_to_pub", "priv_to_pub", gen_suite.render_suite( - title="BLS private key to pubkey", - summary="BLS Convert private key to public key", - forks_timeline="mainnet", - forks=["phase0"], - config="mainnet", - runner="bls", - handler="priv_to_pub", - test_cases=case03_private_to_public_key())) - - -def bls_sign_msg_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - return ("sign_msg", "sign_msg", gen_suite.render_suite( - title="BLS sign msg", - summary="BLS Sign a message", - forks_timeline="mainnet", - forks=["phase0"], - config="mainnet", - runner="bls", - handler="sign_msg", - test_cases=case04_sign_messages())) - - -def bls_aggregate_sigs_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - return ("aggregate_sigs", "aggregate_sigs", gen_suite.render_suite( - title="BLS aggregate sigs", - summary="BLS Aggregate signatures", - forks_timeline="mainnet", - forks=["phase0"], - config="mainnet", - runner="bls", - handler="aggregate_sigs", - test_cases=case06_aggregate_sigs())) - - -def bls_aggregate_pubkeys_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - return ("aggregate_pubkeys", "aggregate_pubkeys", gen_suite.render_suite( - title="BLS aggregate pubkeys", - summary="BLS Aggregate public keys", - forks_timeline="mainnet", - forks=["phase0"], - config="mainnet", - runner="bls", - handler="aggregate_pubkeys", - test_cases=case07_aggregate_pubkeys())) + return gen_typing.TestProvider(prepare=prepare_fn, make_cases=cases_fn) if __name__ == "__main__": gen_runner.run_generator("bls", [ - bls_msg_hash_compressed_suite, - bls_msg_hash_uncompressed_suite, - bls_priv_to_pub_suite, - bls_sign_msg_suite, - bls_aggregate_sigs_suite, - bls_aggregate_pubkeys_suite + create_provider('msg_hash_uncompressed', case01_message_hash_G2_uncompressed), + create_provider('msg_hash_compressed', case02_message_hash_G2_compressed), + create_provider('priv_to_pub', case03_private_to_public_key), + create_provider('sign_msg', case04_sign_messages), + create_provider('aggregate_sigs', case06_aggregate_sigs), + create_provider('aggregate_pubkeys', case07_aggregate_pubkeys), ]) From 08a52c19a2aba5de962bb789d4cf8adc30ee1c88 Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 27 Jul 2019 03:08:39 +0200 Subject: [PATCH 027/130] update genesis test gen --- test_generators/genesis/main.py | 39 +++++++++++++++++---------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/test_generators/genesis/main.py b/test_generators/genesis/main.py index 82899b967..6091a63d8 100644 --- a/test_generators/genesis/main.py +++ b/test_generators/genesis/main.py @@ -1,33 +1,34 @@ -from typing import Callable, Iterable +from typing import Iterable from eth2spec.test.genesis import test_initialization, test_validity -from gen_base import gen_runner, gen_suite, gen_typing +from gen_base import gen_runner, gen_typing from gen_from_tests.gen import generate_from_tests from preset_loader import loader from eth2spec.phase0 import spec as spec -def create_suite(handler_name: str, config_name: str, get_cases: Callable[[], Iterable[gen_typing.TestCase]]) \ - -> Callable[[str], gen_typing.TestSuiteOutput]: - def suite_definition(configs_path: str) -> gen_typing.TestSuiteOutput: - presets = loader.load_presets(configs_path, config_name) - spec.apply_constants_preset(presets) +def create_provider(handler_name: str, tests_src, config_name: str) -> gen_typing.TestProvider: - return ("genesis_%s_%s" % (handler_name, config_name), handler_name, gen_suite.render_suite( - title="genesis testing", - summary="Genesis test suite, %s type, generated from pytests" % handler_name, - forks_timeline="testing", - forks=["phase0"], - config=config_name, - runner="genesis", - handler=handler_name, - test_cases=get_cases())) - return suite_definition + def prepare_fn(configs_path: str) -> str: + presets = loader.load_presets(configs_path, config_name) + spec_phase0.apply_constants_preset(presets) + spec_phase1.apply_constants_preset(presets) + return config_name + + def cases_fn() -> Iterable[gen_typing.TestCase]: + return generate_from_tests( + runner_name='genesis', + handler_name=handler_name, + src=tests_src, + fork_name='phase0' + ) + + return gen_typing.TestProvider(prepare=prepare_fn, make_cases=cases_fn) if __name__ == "__main__": gen_runner.run_generator("genesis", [ - create_suite('initialization', 'minimal', lambda: generate_from_tests(test_initialization, 'phase0')), - create_suite('validity', 'minimal', lambda: generate_from_tests(test_validity, 'phase0')), + create_provider('initialization', test_initialization, 'minimal'), + create_provider('validity', test_validity, 'minimal'), ]) From 156dcfe247bad25099a070f45b11af3ab00b93fc Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 27 Jul 2019 03:09:00 +0200 Subject: [PATCH 028/130] update sanity test gen --- test_generators/sanity/main.py | 38 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/test_generators/sanity/main.py b/test_generators/sanity/main.py index fbef4da96..712f51c07 100644 --- a/test_generators/sanity/main.py +++ b/test_generators/sanity/main.py @@ -1,37 +1,37 @@ -from typing import Callable, Iterable +from typing import Iterable from eth2spec.test.sanity import test_blocks, test_slots -from gen_base import gen_runner, gen_suite, gen_typing +from gen_base import gen_runner, gen_typing from gen_from_tests.gen import generate_from_tests from preset_loader import loader from eth2spec.phase0 import spec as spec_phase0 from eth2spec.phase1 import spec as spec_phase1 -def create_suite(handler_name: str, config_name: str, get_cases: Callable[[], Iterable[gen_typing.TestCase]]) \ - -> Callable[[str], gen_typing.TestSuiteOutput]: - def suite_definition(configs_path: str) -> gen_typing.TestSuiteOutput: +def create_provider(handler_name: str, tests_src, config_name: str) -> gen_typing.TestProvider: + + def prepare_fn(configs_path: str) -> str: presets = loader.load_presets(configs_path, config_name) spec_phase0.apply_constants_preset(presets) spec_phase1.apply_constants_preset(presets) + return config_name - return ("sanity_%s_%s" % (handler_name, config_name), handler_name, gen_suite.render_suite( - title="sanity testing", - summary="Sanity test suite, %s type, generated from pytests" % handler_name, - forks_timeline="testing", - forks=["phase0"], - config=config_name, - runner="sanity", - handler=handler_name, - test_cases=get_cases())) - return suite_definition + def cases_fn() -> Iterable[gen_typing.TestCase]: + return generate_from_tests( + runner_name='sanity', + handler_name=handler_name, + src=tests_src, + fork_name='phase0' + ) + + return gen_typing.TestProvider(prepare=prepare_fn, make_cases=cases_fn) if __name__ == "__main__": gen_runner.run_generator("sanity", [ - create_suite('blocks', 'minimal', lambda: generate_from_tests(test_blocks, 'phase0')), - create_suite('blocks', 'mainnet', lambda: generate_from_tests(test_blocks, 'phase0')), - create_suite('slots', 'minimal', lambda: generate_from_tests(test_slots, 'phase0')), - create_suite('slots', 'mainnet', lambda: generate_from_tests(test_slots, 'phase0')), + create_provider('blocks', test_blocks, 'minimal'), + create_provider('blocks', test_blocks, 'mainnet'), + create_provider('slots', test_slots, 'minimal'), + create_provider('slots', test_slots, 'mainnet'), ]) From c628c8187be177eb76621f121fa1f1c4a7b9badb Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 27 Jul 2019 13:34:19 +0200 Subject: [PATCH 029/130] SSZ static format updated to per-case outputs --- test_generators/ssz_static/main.py | 77 +++++++++----------- test_generators/ssz_static/requirements.txt | 1 - test_libs/gen_helpers/gen_base/gen_runner.py | 14 ++-- 3 files changed, 42 insertions(+), 50 deletions(-) diff --git a/test_generators/ssz_static/main.py b/test_generators/ssz_static/main.py index 0dfdebf5d..c9c45a5a0 100644 --- a/test_generators/ssz_static/main.py +++ b/test_generators/ssz_static/main.py @@ -1,5 +1,5 @@ from random import Random - +from typing import Iterable from inspect import getmembers, isclass from eth2spec.debug import random_value, encode @@ -10,29 +10,20 @@ from eth2spec.utils.ssz.ssz_impl import ( signing_root, serialize, ) -from eth_utils import ( - to_tuple, to_dict -) -from gen_base import gen_runner, gen_suite, gen_typing +from gen_base import gen_runner, gen_typing from preset_loader import loader MAX_BYTES_LENGTH = 100 MAX_LIST_LENGTH = 10 -@to_dict -def create_test_case_contents(value): - yield "value", encode.encode(value) - yield "serialized", '0x' + serialize(value).hex() - yield "root", '0x' + hash_tree_root(value).hex() - if hasattr(value, "signature"): - yield "signing_root", '0x' + signing_root(value).hex() - - -@to_dict -def create_test_case(rng: Random, name: str, typ, mode: random_value.RandomizationMode, chaos: bool): +def create_test_case(rng: Random, typ, mode: random_value.RandomizationMode, chaos: bool) -> Iterable[gen_typing.TestCasePart]: value = random_value.get_random_ssz_object(rng, typ, MAX_BYTES_LENGTH, MAX_LIST_LENGTH, mode, chaos) - yield name, create_test_case_contents(value) + yield "value", "data", encode.encode(value) + yield "serialized", "ssz", serialize(value) + yield "root", "meta", '0x' + hash_tree_root(value).hex() + if hasattr(value, "signature"): + yield "signing_root", "meta", '0x' + signing_root(value).hex() def get_spec_ssz_types(): @@ -42,40 +33,38 @@ def get_spec_ssz_types(): ] -@to_tuple -def ssz_static_cases(rng: Random, mode: random_value.RandomizationMode, chaos: bool, count: int): - for (name, ssz_type) in get_spec_ssz_types(): - for i in range(count): - yield create_test_case(rng, name, ssz_type, mode, chaos) +def ssz_static_cases(seed: int, name, ssz_type, mode: random_value.RandomizationMode, chaos: bool, count: int): + random_mode_name = mode.to_name() + + # Reproducible RNG + rng = Random(seed) + + for i in range(count): + yield gen_typing.TestCase( + fork_name='phase0', + runner_name='ssz_static', + handler_name=name, + suite_name=f"ssz_{random_mode_name}{'_chaos' if chaos else ''}", + case_name=f"case_{i}", + case_fn=lambda: create_test_case(rng, ssz_type, mode, chaos) + ) -def get_ssz_suite(seed: int, config_name: str, mode: random_value.RandomizationMode, chaos: bool, cases_if_random: int): - def ssz_suite(configs_path: str) -> gen_typing.TestSuiteOutput: +def create_provider(config_name: str, seed: int, mode: random_value.RandomizationMode, chaos: bool, + cases_if_random: int) -> gen_typing.TestProvider: + def prepare_fn(configs_path: str) -> str: # Apply changes to presets, this affects some of the vector types. presets = loader.load_presets(configs_path, config_name) spec.apply_constants_preset(presets) + return config_name - # Reproducible RNG - rng = Random(seed) - - random_mode_name = mode.to_name() - - suite_name = f"ssz_{config_name}_{random_mode_name}{'_chaos' if chaos else ''}" - + def cases_fn() -> Iterable[gen_typing.TestCase]: count = cases_if_random if chaos or mode.is_changing() else 1 - print(f"generating SSZ-static suite ({count} cases per ssz type): {suite_name}") - return (suite_name, "core", gen_suite.render_suite( - title=f"ssz testing, with {config_name} config, randomized with mode {random_mode_name}{' and with chaos applied' if chaos else ''}", - summary="Test suite for ssz serialization and hash-tree-root", - forks_timeline="testing", - forks=["phase0"], - config=config_name, - runner="ssz", - handler="static", - test_cases=ssz_static_cases(rng, mode, chaos, count))) + for (i, (name, ssz_type)) in enumerate(get_spec_ssz_types()): + yield from ssz_static_cases(seed * 1000 + i, name, ssz_type, mode, chaos, count) - return ssz_suite + return gen_typing.TestProvider(prepare=prepare_fn, make_cases=cases_fn) if __name__ == "__main__": @@ -91,6 +80,6 @@ if __name__ == "__main__": seed += 1 gen_runner.run_generator("ssz_static", [ - get_ssz_suite(seed, config_name, mode, chaos, cases_if_random) - for (seed, config_name, mode, chaos, cases_if_random) in settings + create_provider(config_name, seed, mode, chaos, cases_if_random) + for (seed, config_name, mode, chaos, cases_if_random) in settings ]) diff --git a/test_generators/ssz_static/requirements.txt b/test_generators/ssz_static/requirements.txt index 595cee69c..3314093d3 100644 --- a/test_generators/ssz_static/requirements.txt +++ b/test_generators/ssz_static/requirements.txt @@ -1,4 +1,3 @@ -eth-utils==1.6.0 ../../test_libs/gen_helpers ../../test_libs/config_helpers ../../test_libs/pyspec \ No newline at end of file diff --git a/test_libs/gen_helpers/gen_base/gen_runner.py b/test_libs/gen_helpers/gen_base/gen_runner.py index f0867db7e..fff3a3436 100644 --- a/test_libs/gen_helpers/gen_base/gen_runner.py +++ b/test_libs/gen_helpers/gen_base/gen_runner.py @@ -108,16 +108,20 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]): for (name, out_kind, data) in test_case.case_fn(): if out_kind == "meta": meta[name] = data - elif out_kind == "data" or out_kind == "ssz": + if out_kind == "data": try: out_path = case_dir / Path(name + '.yaml') with out_path.open(file_mode) as f: yaml.dump(data, f) except IOError as e: - sys.exit(f'Error when dumping test "{case_dir}", part "{name}": {e}') - # if out_kind == "ssz": - # # TODO write SSZ as binary file too. - # out_path = case_dir / Path(name + '.ssz') + sys.exit(f'Error when dumping test "{case_dir}", part "{name}", kind "{out_kind}": {e}') + if out_kind == "ssz": + try: + out_path = case_dir / Path(name + '.ssz') + with out_path.open(file_mode + 'b') as f: # write in raw binary mode + f.write(data) + except IOError as e: + sys.exit(f'Error when dumping test "{case_dir}", part "{name}", kind "{out_kind}": {e}') # Once all meta data is collected (if any), write it to a meta data file. if len(meta) != 0: try: From 5b956b3d26ac2cea136e7e6b9e4f141c8f696db4 Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 27 Jul 2019 16:45:30 +0200 Subject: [PATCH 030/130] implement new ssz generic tests --- test_generators/ssz_generic/main.py | 43 +-------- test_generators/ssz_generic/renderers.py | 93 -------------------- test_generators/ssz_generic/requirements.txt | 2 +- test_generators/ssz_generic/ssz_bitlist.py | 33 +++++++ test_generators/ssz_generic/ssz_bitvector.py | 30 +++++++ test_generators/ssz_generic/ssz_boolean.py | 15 ++++ test_generators/ssz_generic/ssz_test_case.py | 21 +++++ test_generators/ssz_generic/ssz_uints.py | 34 +++++++ 8 files changed, 135 insertions(+), 136 deletions(-) delete mode 100644 test_generators/ssz_generic/renderers.py create mode 100644 test_generators/ssz_generic/ssz_bitlist.py create mode 100644 test_generators/ssz_generic/ssz_bitvector.py create mode 100644 test_generators/ssz_generic/ssz_boolean.py create mode 100644 test_generators/ssz_generic/ssz_test_case.py create mode 100644 test_generators/ssz_generic/ssz_uints.py diff --git a/test_generators/ssz_generic/main.py b/test_generators/ssz_generic/main.py index fe01a68d7..2e34aacf4 100644 --- a/test_generators/ssz_generic/main.py +++ b/test_generators/ssz_generic/main.py @@ -1,46 +1,5 @@ -from uint_test_cases import ( - generate_random_uint_test_cases, - generate_uint_wrong_length_test_cases, - generate_uint_bounds_test_cases, - generate_uint_out_of_bounds_test_cases -) -from gen_base import gen_runner, gen_suite, gen_typing - -def ssz_random_uint_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - return ("uint_random", "uint", gen_suite.render_suite( - title="UInt Random", - summary="Random integers chosen uniformly over the allowed value range", - forks_timeline= "mainnet", - forks=["phase0"], - config="mainnet", - runner="ssz", - handler="uint", - test_cases=generate_random_uint_test_cases())) - - -def ssz_wrong_uint_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - return ("uint_wrong_length", "uint", gen_suite.render_suite( - title="UInt Wrong Length", - summary="Serialized integers that are too short or too long", - forks_timeline= "mainnet", - forks=["phase0"], - config="mainnet", - runner="ssz", - handler="uint", - test_cases=generate_uint_wrong_length_test_cases())) - - -def ssz_uint_bounds_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - return ("uint_bounds", "uint", gen_suite.render_suite( - title="UInt Bounds", - summary="Integers right at or beyond the bounds of the allowed value range", - forks_timeline= "mainnet", - forks=["phase0"], - config="mainnet", - runner="ssz", - handler="uint", - test_cases=generate_uint_bounds_test_cases() + generate_uint_out_of_bounds_test_cases())) +from gen_base import gen_runner, gen_typing if __name__ == "__main__": diff --git a/test_generators/ssz_generic/renderers.py b/test_generators/ssz_generic/renderers.py deleted file mode 100644 index 28571cdda..000000000 --- a/test_generators/ssz_generic/renderers.py +++ /dev/null @@ -1,93 +0,0 @@ -from collections.abc import ( - Mapping, - Sequence, -) - -from eth_utils import ( - encode_hex, - to_dict, -) - -from ssz.sedes import ( - BaseSedes, - Boolean, - Bytes, - BytesN, - Container, - List, - UInt, -) - - -def render_value(value): - if isinstance(value, bool): - return value - elif isinstance(value, int): - return str(value) - elif isinstance(value, bytes): - return encode_hex(value) - elif isinstance(value, Sequence): - return tuple(render_value(element) for element in value) - elif isinstance(value, Mapping): - return render_dict_value(value) - else: - raise ValueError(f"Cannot render value {value}") - - -@to_dict -def render_dict_value(value): - for key, value in value.items(): - yield key, render_value(value) - - -def render_type_definition(sedes): - if isinstance(sedes, Boolean): - return "bool" - - elif isinstance(sedes, UInt): - return f"uint{sedes.length * 8}" - - elif isinstance(sedes, BytesN): - return f"bytes{sedes.length}" - - elif isinstance(sedes, Bytes): - return f"bytes" - - elif isinstance(sedes, List): - return [render_type_definition(sedes.element_sedes)] - - elif isinstance(sedes, Container): - return { - field_name: render_type_definition(field_sedes) - for field_name, field_sedes in sedes.fields - } - - elif isinstance(sedes, BaseSedes): - raise Exception("Unreachable: All sedes types have been checked") - - else: - raise TypeError("Expected BaseSedes") - - -@to_dict -def render_test_case(*, sedes, valid, value=None, serial=None, description=None, tags=None): - value_and_serial_given = value is not None and serial is not None - if valid: - if not value_and_serial_given: - raise ValueError("For valid test cases, both value and ssz must be present") - else: - if value_and_serial_given: - raise ValueError("For invalid test cases, one of either value or ssz must not be present") - - if tags is None: - tags = [] - - yield "type", render_type_definition(sedes) - yield "valid", valid - if value is not None: - yield "value", render_value(value) - if serial is not None: - yield "ssz", encode_hex(serial) - if description is not None: - yield description - yield "tags", tags diff --git a/test_generators/ssz_generic/requirements.txt b/test_generators/ssz_generic/requirements.txt index dcdb0824f..c540f26b5 100644 --- a/test_generators/ssz_generic/requirements.txt +++ b/test_generators/ssz_generic/requirements.txt @@ -1,4 +1,4 @@ eth-utils==1.6.0 ../../test_libs/gen_helpers ../../test_libs/config_helpers -ssz==0.1.0a2 +../../test_libs/pyspec diff --git a/test_generators/ssz_generic/ssz_bitlist.py b/test_generators/ssz_generic/ssz_bitlist.py new file mode 100644 index 000000000..45303123d --- /dev/null +++ b/test_generators/ssz_generic/ssz_bitlist.py @@ -0,0 +1,33 @@ +from .ssz_test_case import invalid_test_case, valid_test_case +from eth2spec.utils.ssz.ssz_typing import Bitlist +from eth2spec.utils.ssz.ssz_impl import serialize +from random import Random +from eth2spec.debug.random_value import RandomizationMode, get_random_ssz_object + + +def bitlist_case_fn(rng: Random, mode: RandomizationMode, limit: int): + return get_random_ssz_object(rng, Bitlist[limit], + max_bytes_length=(limit // 8) + 1, + max_list_length=limit, + mode=mode, chaos=False) + + +def valid_cases(): + rng = Random(1234) + for size in [1, 2, 3, 4, 5, 8, 16, 31, 512, 513]: + for variation in range(5): + for mode in [RandomizationMode.mode_random, RandomizationMode.mode_zero, RandomizationMode.mode_max]: + yield f'bitlist_{size}_{mode.to_name()}_{variation}', \ + valid_test_case(lambda: bitlist_case_fn(rng, mode, size)) + + +def invalid_cases(): + yield 'bitlist_no_delimiter_empty', invalid_test_case(lambda: b'') + yield 'bitlist_no_delimiter_zero_byte', invalid_test_case(lambda: b'\x00') + yield 'bitlist_no_delimiter_zeroes', invalid_test_case(lambda: b'\x00\x00\x00') + rng = Random(1234) + for (typ_limit, test_limit) in [(1, 2), (1, 8), (1, 9), (2, 3), (3, 4), (4, 5), + (5, 6), (8, 9), (32, 64), (32, 33), (512, 513)]: + yield f'bitlist_{typ_limit}_but_{test_limit}', \ + invalid_test_case(lambda: serialize( + bitlist_case_fn(rng, RandomizationMode.mode_max_count, test_limit))) diff --git a/test_generators/ssz_generic/ssz_bitvector.py b/test_generators/ssz_generic/ssz_bitvector.py new file mode 100644 index 000000000..ab3b6831d --- /dev/null +++ b/test_generators/ssz_generic/ssz_bitvector.py @@ -0,0 +1,30 @@ +from .ssz_test_case import invalid_test_case, valid_test_case +from eth2spec.utils.ssz.ssz_typing import Bitvector +from eth2spec.utils.ssz.ssz_impl import serialize +from random import Random +from eth2spec.debug.random_value import RandomizationMode, get_random_ssz_object + + +def bitvector_case_fn(rng: Random, mode: RandomizationMode, size: int): + return get_random_ssz_object(rng, Bitvector[size], + max_bytes_length=(size + 7) // 8, + max_list_length=size, + mode=mode, chaos=False) + + +def valid_cases(): + rng = Random(1234) + for size in [1, 2, 3, 4, 5, 8, 16, 31, 512, 513]: + for mode in [RandomizationMode.mode_random, RandomizationMode.mode_zero, RandomizationMode.mode_max]: + yield f'bitvec_{size}_{mode.to_name()}', valid_test_case(lambda: bitvector_case_fn(rng, mode, size)) + + +def invalid_cases(): + # zero length bitvecors are illegal + yield 'bitvec_0', lambda: b'' + rng = Random(1234) + for (typ_size, test_size) in [(1, 2), (2, 3), (3, 4), (4, 5), + (5, 6), (8, 9), (9, 8), (16, 8), (32, 33), (512, 513)]: + for mode in [RandomizationMode.mode_random, RandomizationMode.mode_zero, RandomizationMode.mode_max]: + yield f'bitvec_{typ_size}_{mode.to_name()}_{test_size}', \ + invalid_test_case(lambda: serialize(bitvector_case_fn(rng, mode, test_size))) diff --git a/test_generators/ssz_generic/ssz_boolean.py b/test_generators/ssz_generic/ssz_boolean.py new file mode 100644 index 000000000..4463ab3e2 --- /dev/null +++ b/test_generators/ssz_generic/ssz_boolean.py @@ -0,0 +1,15 @@ +from .ssz_test_case import valid_test_case, invalid_test_case +from eth2spec.utils.ssz.ssz_typing import boolean + + +def valid_cases(): + yield "true", valid_test_case(lambda: boolean(True)) + yield "false", valid_test_case(lambda: boolean(False)) + + +def invalid_cases(): + yield "byte_2", invalid_test_case(lambda: b'\x02') + yield "byte_rev_nibble", invalid_test_case(lambda: b'\x10') + yield "byte_0x80", invalid_test_case(lambda: b'\x80') + yield "byte_full", invalid_test_case(lambda: b'\xff') + diff --git a/test_generators/ssz_generic/ssz_test_case.py b/test_generators/ssz_generic/ssz_test_case.py new file mode 100644 index 000000000..e6993888c --- /dev/null +++ b/test_generators/ssz_generic/ssz_test_case.py @@ -0,0 +1,21 @@ +from eth2spec.utils.ssz.ssz_impl import serialize, hash_tree_root, signing_root +from eth2spec.debug.encode import encode +from eth2spec.utils.ssz.ssz_typing import SSZValue, Container +from typing import Callable + + +def valid_test_case(value_fn: Callable[[], SSZValue]): + def case_fn(): + value = value_fn() + yield "value", "data", encode(value) + yield "serialized", "ssz", serialize(value) + yield "root", "meta", '0x' + hash_tree_root(value).hex() + if isinstance(value, Container): + yield "signing_root", "meta", '0x' + signing_root(value).hex() + return case_fn + + +def invalid_test_case(bytez_fn: Callable[[], bytes]): + def case_fn(): + yield "serialized", "ssz", bytez_fn() + return case_fn diff --git a/test_generators/ssz_generic/ssz_uints.py b/test_generators/ssz_generic/ssz_uints.py new file mode 100644 index 000000000..6fb55279d --- /dev/null +++ b/test_generators/ssz_generic/ssz_uints.py @@ -0,0 +1,34 @@ +from .ssz_test_case import invalid_test_case, valid_test_case +from eth2spec.utils.ssz.ssz_typing import BasicType, uint8, uint16, uint32, uint64, uint128, uint256 +from random import Random +from eth2spec.debug.random_value import RandomizationMode, get_random_ssz_object + + +def uint_case_fn(rng: Random, mode: RandomizationMode, typ: BasicType): + return get_random_ssz_object(rng, typ, + max_bytes_length=typ.byte_len, + max_list_length=1, + mode=mode, chaos=False) + + +def valid_cases(): + rng = Random(1234) + for uint_type in [uint8, uint16, uint32, uint64, uint128, uint256]: + yield f'uint_{uint_type.byte_len * 8}_last_byte_empty', \ + valid_test_case(lambda: uint_type((2 ** ((uint_type.byte_len - 1) * 8)) - 1)) + for variation in range(5): + for mode in [RandomizationMode.mode_random, RandomizationMode.mode_zero, RandomizationMode.mode_max]: + yield f'uint_{uint_type.byte_len * 8}_{mode.to_name()}_{variation}', \ + valid_test_case(lambda: uint_case_fn(rng, mode, uint_type)) + + +def invalid_cases(): + for uint_type in [uint8, uint16, uint32, uint64, uint128, uint256]: + yield f'uint_{uint_type.byte_len * 8}_one_too_high', \ + invalid_test_case(lambda: (2 ** (uint_type.byte_len * 8)).to_bytes(uint_type.byte_len + 1, 'little')) + for uint_type in [uint8, uint16, uint32, uint64, uint128, uint256]: + yield f'uint_{uint_type.byte_len * 8}_one_byte_longer', \ + invalid_test_case(lambda: (2 ** (uint_type.byte_len * 8) - 1).to_bytes(uint_type.byte_len + 1, 'little')) + for uint_type in [uint8, uint16, uint32, uint64, uint128, uint256]: + yield f'uint_{uint_type.byte_len * 8}_one_byte_shorter', \ + invalid_test_case(lambda: (2 ** ((uint_type.byte_len - 1) * 8) - 1).to_bytes(uint_type.byte_len - 1, 'little')) From aea823763189d9ed98427ae95da1a3741ef8a126 Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 27 Jul 2019 22:31:04 +0200 Subject: [PATCH 031/130] update tests, implement main call, add basic vector tests --- test_generators/ssz_generic/main.py | 39 ++++++++++++++- .../ssz_generic/ssz_basic_vector.py | 49 +++++++++++++++++++ test_generators/ssz_generic/ssz_bitlist.py | 6 ++- test_generators/ssz_generic/ssz_uints.py | 7 ++- 4 files changed, 96 insertions(+), 5 deletions(-) create mode 100644 test_generators/ssz_generic/ssz_basic_vector.py diff --git a/test_generators/ssz_generic/main.py b/test_generators/ssz_generic/main.py index 2e34aacf4..5f223eb29 100644 --- a/test_generators/ssz_generic/main.py +++ b/test_generators/ssz_generic/main.py @@ -1,6 +1,41 @@ - +from typing import Iterable from gen_base import gen_runner, gen_typing +import ssz_basic_vector +import ssz_bitlist +import ssz_bitvector +import ssz_boolean +import ssz_uints + + +def create_provider(handler_name: str, suite_name: str, case_maker) -> gen_typing.TestProvider: + + def prepare_fn(configs_path: str) -> str: + return "general" + + def cases_fn() -> Iterable[gen_typing.TestCase]: + for (case_name, case_fn) in case_maker(): + yield gen_typing.TestCase( + fork_name='phase0', + runner_name='ssz_generic', + handler_name=handler_name, + suite_name=suite_name, + case_name=case_name, + case_fn=case_fn + ) + + return gen_typing.TestProvider(prepare=prepare_fn, make_cases=cases_fn) if __name__ == "__main__": - gen_runner.run_generator("ssz_generic", [ssz_random_uint_suite, ssz_wrong_uint_suite, ssz_uint_bounds_suite]) + gen_runner.run_generator("ssz_generic", [ + create_provider("basic_vector", "valid", ssz_basic_vector.valid_cases), + create_provider("basic_vector", "invalid", ssz_basic_vector.invalid_cases), + create_provider("bitlist", "valid", ssz_bitlist.valid_cases), + create_provider("bitlist", "invalid", ssz_bitlist.invalid_cases), + create_provider("bitvector", "valid", ssz_bitvector.valid_cases), + create_provider("bitvector", "invalid", ssz_bitvector.invalid_cases), + create_provider("boolean", "valid", ssz_boolean.valid_cases), + create_provider("boolean", "invalid", ssz_boolean.invalid_cases), + create_provider("uints", "valid", ssz_uints.valid_cases), + create_provider("uints", "invalid", ssz_uints.invalid_cases), + ]) diff --git a/test_generators/ssz_generic/ssz_basic_vector.py b/test_generators/ssz_generic/ssz_basic_vector.py new file mode 100644 index 000000000..fa51113d9 --- /dev/null +++ b/test_generators/ssz_generic/ssz_basic_vector.py @@ -0,0 +1,49 @@ +from .ssz_test_case import invalid_test_case, valid_test_case +from eth2spec.utils.ssz.ssz_typing import boolean, uint8, uint16, uint32, uint64, uint128, uint256, Vector, BasicType +from eth2spec.utils.ssz.ssz_impl import serialize +from random import Random +from typing import Dict +from eth2spec.debug.random_value import RandomizationMode, get_random_ssz_object + + +def basic_vector_case_fn(rng: Random, mode: RandomizationMode, elem_type: BasicType, length: int): + return get_random_ssz_object(rng, Vector[elem_type, length], + max_bytes_length=length * 8, + max_list_length=length, + mode=mode, chaos=False) + + +BASIC_TYPES: Dict[str, BasicType] = { + 'bool': boolean, + 'uint8': uint8, + 'uint16': uint16, + 'uint32': uint32, + 'uint64': uint64, + 'uint128': uint128, + 'uint256': uint256 +} + + +def valid_cases(): + rng = Random(1234) + for (name, typ) in BASIC_TYPES.items(): + for length in [1, 2, 3, 4, 5, 8, 16, 31, 512, 513]: + for mode in [RandomizationMode.mode_random, RandomizationMode.mode_zero, RandomizationMode.mode_max]: + yield f'vec_{name}_{length}_{mode.to_name()}',\ + valid_test_case(lambda: basic_vector_case_fn(rng, mode, typ, length)) + + +def invalid_cases(): + # zero length vectors are illegal + for (name, typ) in BASIC_TYPES: + yield f'vec_{name}_0', lambda: b'' + + rng = Random(1234) + for (name, typ) in BASIC_TYPES.items(): + for length in [1, 2, 3, 4, 5, 8, 16, 31, 512, 513]: + yield f'vec_{name}_{length}_nil', invalid_test_case(lambda: b'') + for mode in [RandomizationMode.mode_random, RandomizationMode.mode_zero, RandomizationMode.mode_max]: + yield f'vec_{name}_{length}_{mode.to_name()}_one_less', \ + invalid_test_case(lambda: serialize(basic_vector_case_fn(rng, mode, typ, length - 1))) + yield f'vec_{name}_{length}_{mode.to_name()}_one_more', \ + invalid_test_case(lambda: serialize(basic_vector_case_fn(rng, mode, typ, length + 1))) diff --git a/test_generators/ssz_generic/ssz_bitlist.py b/test_generators/ssz_generic/ssz_bitlist.py index 45303123d..e0c756aeb 100644 --- a/test_generators/ssz_generic/ssz_bitlist.py +++ b/test_generators/ssz_generic/ssz_bitlist.py @@ -16,7 +16,11 @@ def valid_cases(): rng = Random(1234) for size in [1, 2, 3, 4, 5, 8, 16, 31, 512, 513]: for variation in range(5): - for mode in [RandomizationMode.mode_random, RandomizationMode.mode_zero, RandomizationMode.mode_max]: + for mode in [RandomizationMode.mode_nil_count, + RandomizationMode.mode_max_count, + RandomizationMode.mode_random, + RandomizationMode.mode_zero, + RandomizationMode.mode_max]: yield f'bitlist_{size}_{mode.to_name()}_{variation}', \ valid_test_case(lambda: bitlist_case_fn(rng, mode, size)) diff --git a/test_generators/ssz_generic/ssz_uints.py b/test_generators/ssz_generic/ssz_uints.py index 6fb55279d..93af6b91e 100644 --- a/test_generators/ssz_generic/ssz_uints.py +++ b/test_generators/ssz_generic/ssz_uints.py @@ -11,9 +11,12 @@ def uint_case_fn(rng: Random, mode: RandomizationMode, typ: BasicType): mode=mode, chaos=False) +UINT_TYPES = [uint8, uint16, uint32, uint64, uint128, uint256] + + def valid_cases(): rng = Random(1234) - for uint_type in [uint8, uint16, uint32, uint64, uint128, uint256]: + for uint_type in UINT_TYPES: yield f'uint_{uint_type.byte_len * 8}_last_byte_empty', \ valid_test_case(lambda: uint_type((2 ** ((uint_type.byte_len - 1) * 8)) - 1)) for variation in range(5): @@ -23,7 +26,7 @@ def valid_cases(): def invalid_cases(): - for uint_type in [uint8, uint16, uint32, uint64, uint128, uint256]: + for uint_type in UINT_TYPES: yield f'uint_{uint_type.byte_len * 8}_one_too_high', \ invalid_test_case(lambda: (2 ** (uint_type.byte_len * 8)).to_bytes(uint_type.byte_len + 1, 'little')) for uint_type in [uint8, uint16, uint32, uint64, uint128, uint256]: From 88dbd18394047b2f040ba6f5bfb02c739125cc46 Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 27 Jul 2019 23:57:07 +0200 Subject: [PATCH 032/130] fix imports, new container tests, update randomization logic --- test_generators/ssz_generic/main.py | 3 + .../ssz_generic/ssz_basic_vector.py | 23 +++- test_generators/ssz_generic/ssz_bitlist.py | 2 +- test_generators/ssz_generic/ssz_bitvector.py | 4 +- test_generators/ssz_generic/ssz_boolean.py | 2 +- test_generators/ssz_generic/ssz_container.py | 120 ++++++++++++++++++ test_generators/ssz_generic/ssz_uints.py | 2 +- 7 files changed, 145 insertions(+), 11 deletions(-) create mode 100644 test_generators/ssz_generic/ssz_container.py diff --git a/test_generators/ssz_generic/main.py b/test_generators/ssz_generic/main.py index 5f223eb29..83e6da86d 100644 --- a/test_generators/ssz_generic/main.py +++ b/test_generators/ssz_generic/main.py @@ -5,6 +5,7 @@ import ssz_bitlist import ssz_bitvector import ssz_boolean import ssz_uints +import ssz_container def create_provider(handler_name: str, suite_name: str, case_maker) -> gen_typing.TestProvider: @@ -38,4 +39,6 @@ if __name__ == "__main__": create_provider("boolean", "invalid", ssz_boolean.invalid_cases), create_provider("uints", "valid", ssz_uints.valid_cases), create_provider("uints", "invalid", ssz_uints.invalid_cases), + create_provider("containers", "valid", ssz_container.valid_cases), + create_provider("containers", "invalid", ssz_container.invalid_cases), ]) diff --git a/test_generators/ssz_generic/ssz_basic_vector.py b/test_generators/ssz_generic/ssz_basic_vector.py index fa51113d9..6e7e08daa 100644 --- a/test_generators/ssz_generic/ssz_basic_vector.py +++ b/test_generators/ssz_generic/ssz_basic_vector.py @@ -1,4 +1,4 @@ -from .ssz_test_case import invalid_test_case, valid_test_case +from ssz_test_case import invalid_test_case, valid_test_case from eth2spec.utils.ssz.ssz_typing import boolean, uint8, uint16, uint32, uint64, uint128, uint256, Vector, BasicType from eth2spec.utils.ssz.ssz_impl import serialize from random import Random @@ -27,23 +27,34 @@ BASIC_TYPES: Dict[str, BasicType] = { def valid_cases(): rng = Random(1234) for (name, typ) in BASIC_TYPES.items(): + random_modes = [RandomizationMode.mode_zero, RandomizationMode.mode_max] + if name != 'bool': + random_modes.append(RandomizationMode.mode_random) for length in [1, 2, 3, 4, 5, 8, 16, 31, 512, 513]: - for mode in [RandomizationMode.mode_random, RandomizationMode.mode_zero, RandomizationMode.mode_max]: - yield f'vec_{name}_{length}_{mode.to_name()}',\ + for mode in random_modes: + yield f'vec_{name}_{length}_{mode.to_name()}', \ valid_test_case(lambda: basic_vector_case_fn(rng, mode, typ, length)) def invalid_cases(): # zero length vectors are illegal - for (name, typ) in BASIC_TYPES: - yield f'vec_{name}_0', lambda: b'' + for (name, typ) in BASIC_TYPES.items(): + yield f'vec_{name}_0', invalid_test_case(lambda: b'') rng = Random(1234) for (name, typ) in BASIC_TYPES.items(): + random_modes = [RandomizationMode.mode_zero, RandomizationMode.mode_max] + if name != 'bool': + random_modes.append(RandomizationMode.mode_random) for length in [1, 2, 3, 4, 5, 8, 16, 31, 512, 513]: yield f'vec_{name}_{length}_nil', invalid_test_case(lambda: b'') - for mode in [RandomizationMode.mode_random, RandomizationMode.mode_zero, RandomizationMode.mode_max]: + for mode in random_modes: yield f'vec_{name}_{length}_{mode.to_name()}_one_less', \ invalid_test_case(lambda: serialize(basic_vector_case_fn(rng, mode, typ, length - 1))) yield f'vec_{name}_{length}_{mode.to_name()}_one_more', \ invalid_test_case(lambda: serialize(basic_vector_case_fn(rng, mode, typ, length + 1))) + yield f'vec_{name}_{length}_{mode.to_name()}_one_byte_less', \ + invalid_test_case(lambda: serialize(basic_vector_case_fn(rng, mode, typ, length))[:-1]) + yield f'vec_{name}_{length}_{mode.to_name()}_one_byte_more', \ + invalid_test_case(lambda: serialize(basic_vector_case_fn(rng, mode, typ, length)) + + serialize(basic_vector_case_fn(rng, mode, uint8, 1))) diff --git a/test_generators/ssz_generic/ssz_bitlist.py b/test_generators/ssz_generic/ssz_bitlist.py index e0c756aeb..d1a940eee 100644 --- a/test_generators/ssz_generic/ssz_bitlist.py +++ b/test_generators/ssz_generic/ssz_bitlist.py @@ -1,4 +1,4 @@ -from .ssz_test_case import invalid_test_case, valid_test_case +from ssz_test_case import invalid_test_case, valid_test_case from eth2spec.utils.ssz.ssz_typing import Bitlist from eth2spec.utils.ssz.ssz_impl import serialize from random import Random diff --git a/test_generators/ssz_generic/ssz_bitvector.py b/test_generators/ssz_generic/ssz_bitvector.py index ab3b6831d..2b04577e8 100644 --- a/test_generators/ssz_generic/ssz_bitvector.py +++ b/test_generators/ssz_generic/ssz_bitvector.py @@ -1,4 +1,4 @@ -from .ssz_test_case import invalid_test_case, valid_test_case +from ssz_test_case import invalid_test_case, valid_test_case from eth2spec.utils.ssz.ssz_typing import Bitvector from eth2spec.utils.ssz.ssz_impl import serialize from random import Random @@ -21,7 +21,7 @@ def valid_cases(): def invalid_cases(): # zero length bitvecors are illegal - yield 'bitvec_0', lambda: b'' + yield 'bitvec_0', invalid_test_case(lambda: b'') rng = Random(1234) for (typ_size, test_size) in [(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (8, 9), (9, 8), (16, 8), (32, 33), (512, 513)]: diff --git a/test_generators/ssz_generic/ssz_boolean.py b/test_generators/ssz_generic/ssz_boolean.py index 4463ab3e2..9ff36ba88 100644 --- a/test_generators/ssz_generic/ssz_boolean.py +++ b/test_generators/ssz_generic/ssz_boolean.py @@ -1,4 +1,4 @@ -from .ssz_test_case import valid_test_case, invalid_test_case +from ssz_test_case import valid_test_case, invalid_test_case from eth2spec.utils.ssz.ssz_typing import boolean diff --git a/test_generators/ssz_generic/ssz_container.py b/test_generators/ssz_generic/ssz_container.py new file mode 100644 index 000000000..7dbd5e111 --- /dev/null +++ b/test_generators/ssz_generic/ssz_container.py @@ -0,0 +1,120 @@ +from ssz_test_case import invalid_test_case, valid_test_case +from eth2spec.utils.ssz.ssz_typing import SSZType, Container, byte, uint8, uint16, \ + uint32, uint64, List, Bytes, Vector, Bitvector, Bitlist +from eth2spec.utils.ssz.ssz_impl import serialize +from random import Random +from typing import Dict, Tuple, Sequence, Callable +from eth2spec.debug.random_value import RandomizationMode, get_random_ssz_object + + +class SingleFieldTestStruct(Container): + A: byte + + +class SmallTestStruct(Container): + A: uint16 + B: uint16 + + +class FixedTestStruct(Container): + A: uint8 + B: uint64 + C: uint32 + + +class VarTestStruct(Container): + A: uint16 + B: List[uint16, 1024] + C: uint8 + + +class ComplexTestStruct(Container): + A: uint16 + B: List[uint16, 128] + C: uint8 + D: Bytes[256] + E: VarTestStruct + F: Vector[FixedTestStruct, 4] + G: Vector[VarTestStruct, 2] + + +class BitsStruct(Container): + A: Bitlist[5] + B: Bitvector[2] + C: Bitvector[1] + D: Bitlist[6] + E: Bitvector[8] + + +def container_case_fn(rng: Random, mode: RandomizationMode, typ: SSZType): + return get_random_ssz_object(rng, typ, + max_bytes_length=2000, + max_list_length=2000, + mode=mode, chaos=False) + + +PRESET_CONTAINERS: Dict[str, Tuple[SSZType, Sequence[int]]] = { + 'SingleFieldTestStruct': (SingleFieldTestStruct, []), + 'SmallTestStruct': (SmallTestStruct, []), + 'FixedTestStruct': (FixedTestStruct, []), + 'VarTestStruct': (VarTestStruct, [2]), + 'ComplexTestStruct': (ComplexTestStruct, [2, 2 + 4 + 1, 2 + 4 + 1 + 4]), + 'BitsStruct': (BitsStruct, [0, 4 + 1 + 1, 4 + 1 + 1 + 4]), +} + + +def valid_cases(): + rng = Random(1234) + for (name, (typ, offsets)) in PRESET_CONTAINERS.items(): + for mode in [RandomizationMode.mode_zero, RandomizationMode.mode_max]: + yield f'{name}_{mode.to_name()}', valid_test_case(lambda: container_case_fn(rng, mode, typ)) + random_modes = [RandomizationMode.mode_random, RandomizationMode.mode_zero, RandomizationMode.mode_max] + if len(offsets) != 0: + random_modes.extend([RandomizationMode.mode_nil_count, + RandomizationMode.mode_one_count, + RandomizationMode.mode_max_count]) + for mode in random_modes: + for variation in range(10): + yield f'{name}_{mode.to_name()}_{variation}', \ + valid_test_case(lambda: container_case_fn(rng, mode, typ)) + for variation in range(3): + yield f'{name}_{mode.to_name()}_chaos_{variation}', \ + valid_test_case(lambda: container_case_fn(rng, mode, typ)) + + +def mod_offset(b: bytes, offset_index: int, change: Callable[[int], int]): + return b[:offset_index] + \ + (change(int.from_bytes(b[offset_index:offset_index + 4], byteorder='little')) & 0xffffffff) \ + .to_bytes(length=4, byteorder='little') + \ + b[offset_index + 4:] + + +def invalid_cases(): + rng = Random(1234) + for (name, (typ, offsets)) in PRESET_CONTAINERS.items(): + # using mode_max_count, so that the extra byte cannot be picked up as normal list content + yield f'{name}_extra_byte', \ + invalid_test_case(lambda: serialize( + container_case_fn(rng, RandomizationMode.mode_max_count, typ)) + b'\xff') + + if len(offsets) != 0: + # Note: there are many more ways to have invalid offsets, + # these are just example to get clients started looking into hardening ssz. + for mode in [RandomizationMode.mode_random, + RandomizationMode.mode_nil_count, + RandomizationMode.mode_one_count, + RandomizationMode.mode_max_count]: + if len(offsets) != 0: + for offset_index in offsets: + yield f'{name}_offset_{offset_index}_plus_one', \ + invalid_test_case(lambda: mod_offset( + b=serialize(container_case_fn(rng, mode, typ)), + offset_index=offset_index, + change=lambda x: x + 1 + )) + yield f'{name}_offset_{offset_index}_zeroed', \ + invalid_test_case(lambda: mod_offset( + b=serialize(container_case_fn(rng, mode, typ)), + offset_index=offset_index, + change=lambda x: 0 + )) diff --git a/test_generators/ssz_generic/ssz_uints.py b/test_generators/ssz_generic/ssz_uints.py index 93af6b91e..b21fb251c 100644 --- a/test_generators/ssz_generic/ssz_uints.py +++ b/test_generators/ssz_generic/ssz_uints.py @@ -1,4 +1,4 @@ -from .ssz_test_case import invalid_test_case, valid_test_case +from ssz_test_case import invalid_test_case, valid_test_case from eth2spec.utils.ssz.ssz_typing import BasicType, uint8, uint16, uint32, uint64, uint128, uint256 from random import Random from eth2spec.debug.random_value import RandomizationMode, get_random_ssz_object From adb6bff3658f8830cf8ad1533ba287e2920cb2a3 Mon Sep 17 00:00:00 2001 From: protolambda Date: Sat, 27 Jul 2019 23:57:57 +0200 Subject: [PATCH 033/130] make random value generator respect byte list type limit --- test_libs/pyspec/eth2spec/debug/random_value.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test_libs/pyspec/eth2spec/debug/random_value.py b/test_libs/pyspec/eth2spec/debug/random_value.py index 95a3ae970..9a7d47239 100644 --- a/test_libs/pyspec/eth2spec/debug/random_value.py +++ b/test_libs/pyspec/eth2spec/debug/random_value.py @@ -56,15 +56,15 @@ def get_random_ssz_object(rng: Random, if mode == RandomizationMode.mode_nil_count: return typ(b'') elif mode == RandomizationMode.mode_max_count: - return typ(get_random_bytes_list(rng, max_bytes_length)) + return typ(get_random_bytes_list(rng, min(max_bytes_length, typ.length))) elif mode == RandomizationMode.mode_one_count: - return typ(get_random_bytes_list(rng, 1)) + return typ(get_random_bytes_list(rng, min(1, typ.length))) elif mode == RandomizationMode.mode_zero: - return typ(b'\x00') + return typ(b'\x00' * min(1, typ.length)) elif mode == RandomizationMode.mode_max: - return typ(b'\xff') + return typ(b'\xff' * min(1, typ.length)) else: - return typ(get_random_bytes_list(rng, rng.randint(0, max_bytes_length))) + return typ(get_random_bytes_list(rng, rng.randint(0, min(max_bytes_length, typ.length)))) elif issubclass(typ, BytesN): # Sanity, don't generate absurdly big random values # If a client is aiming to performance-test, they should create a benchmark suite. From 4b2b5815c919ed7263ae1a5105854a9b73ed1ef9 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Mon, 29 Jul 2019 16:43:55 -0400 Subject: [PATCH 034/130] Add shard state transition function --- specs/core/1_shard-data-chains.md | 216 +++++++++++++++++++++++------- 1 file changed, 170 insertions(+), 46 deletions(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index fc839930f..1c94741d8 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -34,6 +34,7 @@ - [`pad`](#pad) - [`flatten_shard_header`](#flatten_shard_header) - [`compute_crosslink_data_root`](#compute_crosslink_data_root) + - [`get_default_shard_state`](#get_default_shard_state) - [Object validity](#object-validity) - [Shard blocks](#shard-blocks) - [Beacon attestations](#beacon-attestations) @@ -61,8 +62,11 @@ We define the following Python custom types for type hinting and readability: | - | - | | `SHARD_HEADER_SIZE` | `2**9` (= 512) | | `SHARD_BLOCK_SIZE_LIMIT` | `2**16` (= 65,536) | +| `SHARD_BLOCK_SIZE_TARGET` | `2**14` (= 16,384) | | `SHARD_SLOTS_PER_BEACON_SLOT` | `2**1` (= 2) | | `MAX_PERSISTENT_COMMITTEE_SIZE` | `2**7` (= 128) | +| `REWARD_COEFFICIENT_BASE` | `2**20` ( = 1,048,576) | +| `BASEFEE_ADJUSTMENT_FACTOR` | `2**3` (= 8) | ### Initial values @@ -148,6 +152,31 @@ class ExtendedShardBlockCore(Container): attester_bitfield: Bitvector[MAX_PERSISTENT_COMMITTEE_SIZE * 2] ``` +### `ShardState` + +```python +class ShardState(Container): + history_acc: Vector[Hash, 64] + earlier_committee_rewards: List[uint64, MAX_PERSISTENT_COMMITTEE_SIZE] + later_committee_rewards: List[uint64, MAX_PERSISTENT_COMMITTEE_SIZE] + earlier_committee_fees: List[Gwei, MAX_PERSISTENT_COMMITTEE_SIZE] + later_committee_fees: List[Gwei, MAX_PERSISTENT_COMMITTEE_SIZE] + basefee: Gwei + slot: ShardSlot + shard: Shard + most_recent_block_core: ShardBlockCore + receipt_root: Hash +``` + +### `ShardReceiptDelta` + +```python +class ShardReceiptDelta(Container): + index: ValidatorIndex + reward_coefficient: uint64 + block_fee: Gwei +``` + ## Helper functions ### `compute_slot_of_shard_slot` @@ -167,7 +196,7 @@ def compute_epoch_of_shard_slot(slot: ShardSlot) -> Epoch: ### `get_shard_period_start_epoch` ```python -def get_shard_period_start_epoch(epoch: Epoch, lookback: Epoch=Epoch(0)) -> Epoch: +def get_shard_period_start_epoch(epoch: Epoch, lookback: uint64=0) -> Epoch: return Epoch(epoch - (epoch % EPOCHS_PER_SHARD_PERIOD) - lookback * EPOCHS_PER_SHARD_PERIOD) ``` @@ -201,8 +230,8 @@ def get_persistent_committee(state: BeaconState, """ epoch = compute_epoch_of_shard_slot(slot) - earlier_committee = get_period_committee(state, get_shard_period_start_epoch(epoch, lookback=Epoch(2)), shard) - later_committee = get_period_committee(state, get_shard_period_start_epoch(epoch, lookback=Epoch(1)), shard) + earlier_committee = get_period_committee(state, get_shard_period_start_epoch(epoch, lookback=2), shard) + later_committee = get_period_committee(state, get_shard_period_start_epoch(epoch, lookback=1), shard) # Take not-yet-cycled-out validators from earlier committee and already-cycled-in validators from # later committee; return a sorted list of the union of the two, deduplicated @@ -299,60 +328,130 @@ def compute_crosslink_data_root(blocks: Sequence[ShardBlock]) -> Hash: return hash_tree_root(BytesN[MAX_SIZE](pad(header + footer, MAX_SIZE))) ``` -## Object validity - -### Shard blocks - -Let: - -- `beacon_blocks` be the `BeaconBlock` list such that `beacon_blocks[slot]` is the canonical `BeaconBlock` at slot `slot` -- `beacon_state` be the canonical `BeaconState` after processing `beacon_blocks[-1]` -- `shard` is the shard ID -- `valid_shard_blocks` be the list of valid `ShardBlock`, recursively defined -- `candidate` be a candidate `ShardBlock` for which validity is to be determined by running `is_valid_shard_block` +### `get_default_shard_state` ```python -def is_valid_shard_block(beacon_state: BeaconState, - beacon_blocks: Sequence[BeaconBlock], - shard: Shard, - valid_shard_blocks: Sequence[ShardBlock], - candidate: ShardBlock) -> bool: - # Check if block is already determined valid - for _, block in enumerate(valid_shard_blocks): - if candidate == block: - return True +def get_default_shard_state(beacon_state: BeaconState, shard: Shard) -> ShardState: + earlier_committee = get_period_committee(beacon_state, PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD * 2, shard) + later_committee = get_period_committee(beacon_state, PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD, shard) + return ShardState( + basefee=1, + shard=shard, + slot=PHASE_1_FORK_SLOT, + earlier_committee_rewards=[REWARD_COEFFICIENT_BASE for _ in range(len(earlier_committee))], + later_committee_rewards=[REWARD_COEFFICIENT_BASE for _ in range(len(later_committee))], + earlier_committee_fees=[0 for _ in range(len(earlier_committee))], + later_committee_fees=[0 for _ in range(len(later_committee))], + ) +``` +## Object validity + +### Shard block validation: preliminary + +Accept a shard block `block` only if all of the following are correct: + +* Either `block.core.parent_root == ZERO_HASH` or a block `parent` such that `hash_tree_root(parent.core) == block.core.parent_root` has already been accepted. +* `block.core.beacon_chain_root == get_block_root(head_beacon_state, compute_epoch_of_shard_slot(parent.core.slot))` where `head_beacon_state` is the current beacon chain head state. Alternatively phrased, a beacon chain block `beacon_ref` such that `signing_root(beacon_ref) == block.core.beacon_chain_root` has already been accepted and is part of the canonical chain, and no block with slot `beacon_ref.slot < slot <= compute_start_slot_of_epoch(compute_epoch_of_shard_slot(parent.core.slot))` is part of the canonical chain. +* Let `beacon_state` be the state where `beacon_ref.state_root == hash_tree_root(beacon_state)`. Let `prev_state` be the post-state of the `parent` if the `parent` exists, otherwise let it be `get_default_shard_state(beacon_state, shard)` (defined below). `block.core.state_root` must equal the `hash_tree_root` of the state after applying `shard_state_transition(prev_state, beacon_state, block)`. + +Note that these acceptance conditions depend on the canonical beacon chain; when the canonical beacon chain reorganizes, the eligibility of shard blocks should be re-evaluated. + +### Shard state transition function helpers + +```python +def add_reward(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, delta: uint): + epoch = compute_epoch_of_shard_slot(state.slot) + earlier_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=2), state.shard) + later_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=1), state.shard) + if index in earlier_committee: + state.earlier_committee_rewards[earlier_committee.index(index)] += delta + elif index in later_committee: + state.later_committee_rewards[later_committee.index(index)] += delta + else: + raise Exception("Should never be here") +``` + +```python +def add_fee(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, delta: uint): + epoch = compute_epoch_of_shard_slot(state.slot) + earlier_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=2), state.shard) + later_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=1), state.shard) + if index in earlier_committee: + state.earlier_committee_fees[earlier_committee.index(index)] += delta + elif index in later_committee: + state.later_committee_fees[later_committee.index(index)] += delta + else: + raise Exception("Should never be here") +``` + +### Shard state transition function + +```python +def shard_state_transition(state: ShardState, beacon_state: BeaconState, block: ShardBlock): + assert block.core.slot > state.slot + for slot in range(state.slot, block.core.slot): + shard_slot_transition(state, beacon_state) + shard_block_transition(state, beacon_state, block) +``` + +```python +def shard_slot_transition(state: ShardState, beacon_state: BeaconState): + # Correct saved state root + if state.most_recent_block_core.state_root == ZERO_HASH: + state.most_recent_block_core.state_root = hash_tree_root(state) + + # Save states in history accumulator + depth = 0 + h = hash_tree_root(state) + while state.slot % 2**depth == 0: + state.history_acc[depth] = h + + # Period transitions + if (state.slot + 1) % (SHARD_SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD) == 0: + epoch = compute_epoch_of_shard_slot(state.slot) + earlier_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=2), state.shard) + later_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=1), state.shard) + state.receipt_root = hash_tree_root(List[ShardReceiptDelta, PLACEHOLDER]([ + ShardReceiptDelta(index, state.earlier_committee_rewards[i], state.earlier_committee_fees[i]) + for i, index in enumerate(committee) + ])) + state.earlier_committee_rewards = state.later_committee_rewards + state.earlier_committee_fees = state.later_committee_fees + state.later_committee_rewards = [REWARD_COEFFICIENT_BASE for _ in range(len(later_committee))], + state.later_committee_fees = [0 for _ in range(len(later_committee))], + else: + state.receipt_root = ZERO_HASH + state.slot += 1 +``` + +```python +def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: ShardBlock): # Check slot number - assert compute_slot_of_shard_slot(candidate.core.slot) >= PHASE_1_FORK_SLOT - - # Check beacon block - beacon_block_slot = compute_start_slot_of_epoch(compute_epoch_of_shard_slot(candidate.core.slot)) - beacon_block = beacon_blocks[beacon_block_slot] - assert candidate.core.beacon_block_root == signing_root(beacon_block) - assert beacon_block.slot <= candidate.core.slot - - # Check state root - assert candidate.core.state_root == Hash() # [to be removed in phase 2] - + assert candidate.core.slot == state.slot + # Check parent block if candidate.core.parent_root != Hash(): - parent_block = next( - (block for block in valid_shard_blocks if hash_tree_root(block.core) == candidate.core.parent_root), - None - ) - assert parent_block is not None - assert parent_block.core.slot < candidate.core.slot - parent_beacon_block_slot = compute_start_slot_of_epoch(compute_epoch_of_shard_slot(parent_block.core.slot)) - assert signing_root(beacon_blocks[parent_beacon_block_slot]) == parent_block.core.beacon_chain_root - + assert candidate.core.parent_root == hash_tree_root(state.most_recent_block_core) + + # Calculate base reward + total_balance = get_total_active_balance(beacon_state) + base_reward = Gwei(REWARD_COEFFICIENT_BASE * BASE_REWARD_FACTOR // integer_squareroot(total_balance) // BASE_REWARDS_PER_EPOCH) + # Check attestations attester_committee = get_persistent_committee(beacon_state, shard, block.core.slot) pubkeys = [] + attestations = 0 + for i, index in enumerate(attester_committee): if block.core.attester_bitfield[i]: pubkeys.append(beacon_state.validators[index].pubkey) - for i in range(len(attester_committee), MAX_PERSISTENT_COMMITTEE_SIZE * 2): + add_reward(state, beacon_state, index, base_reward) + attestations += 1 + + for i in range(len(attester_committee), MAX_PERSISTENT_COMMITTEE_SIZE): assert block.attester_bitfield[i] is False + assert bls_verify( pubkey=bls_aggregate_pubkeys(pubkeys), message_hash=candidate.core.parent_root, @@ -363,14 +462,39 @@ def is_valid_shard_block(beacon_state: BeaconState, # Check proposer proposer_index = get_shard_block_proposer_index(beacon_state, shard, candidate.core.slot) assert proposer_index is not None + add_reward(state, beacon_state, proposer_index, attestations * base_reward // PROPOSER_REWARD_QUOTIENT) assert bls_verify( pubkey=beacon_state.validators[proposer_index].pubkey, message_hash=hash_tree_root(candidate.core), signature=candidate.signatures.proposer_signature, domain=get_domain(beacon_state, DOMAIN_SHARD_PROPOSER, compute_epoch_of_shard_slot(candidate.core.slot)), ) - - return True + + # Process and update block data fees + add_fee(state, beacon_state, proposer_index, state.basefee * len(block.core.data) // SHARD_BLOCK_SIZE_LIMIT) + QUOTIENT = SHARD_BLOCK_SIZE_LIMIT * BASEFEE_ADJUSTMENT_FACTOR) + if len(block.core.data) > SHARD_BLOCK_SIZE_TARGET: + state.basefee += min(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT) + elif len(block.core.data) < SHARD_BLOCK_SIZE_TARGET: + state.basefee -= min(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT) + state.basefee = max(1, min(EFFECTIVE_BALANCE_INCREMENT // EPOCHS_PER_SHARD_PERIOD // SHARD_SLOTS_PER_EPOCH, state.basefee)) + + # Check total bytes + assert block.core.total_bytes == state.most_recent_block_core.total_bytes + len(block.core.data) + + # Update in-state block header + state.most_recent_block_core = ShardBlockCore( + slot=block.core.slot, + beacon_chain_root=block.core.beacon_chain_root, + parent_root=block.core.parent_root, + data_root=block.core.data_root, + state_root=ZERO_HASH, + total_bytes=block.core.total_bytes, + attester_bitfield=block.core.attester_bitfield + ) + + # Check state root + assert hash_tree_root(state) == block.core.state_root ``` ### Beacon attestations From eb7c3b9651c3da7b46644fc6f4cc68a231d92dc7 Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 00:40:01 +0200 Subject: [PATCH 035/130] make test gen output SSZ in addition to yaml files for SSZ objects --- test_libs/pyspec/eth2spec/test/utils.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/test_libs/pyspec/eth2spec/test/utils.py b/test_libs/pyspec/eth2spec/test/utils.py index e15c5efeb..6aaf14054 100644 --- a/test_libs/pyspec/eth2spec/test/utils.py +++ b/test_libs/pyspec/eth2spec/test/utils.py @@ -1,6 +1,7 @@ from typing import Dict, Any from eth2spec.debug.encode import encode from eth2spec.utils.ssz.ssz_typing import SSZValue +from eth2spec.utils.ssz.ssz_impl import serialize def vector_test(description: str = None): @@ -35,13 +36,22 @@ def vector_test(description: str = None): continue # Try to infer the type, but keep it as-is if it's not a SSZ type or bytes. (key, value) = data - if isinstance(value, (SSZValue, bytes)): + if value is None: + continue + if isinstance(value, SSZValue): yield key, 'data', encode(value) - # TODO: add SSZ bytes as second output + yield key, 'ssz', serialize(value) + elif isinstance(value, bytes): + yield key, 'data', encode(value) + yield key, 'ssz', value elif isinstance(value, list) and all([isinstance(el, (SSZValue, bytes)) for el in value]): for i, el in enumerate(value): - yield f'{key}_{i}', 'data', encode(el) - # TODO: add SSZ bytes as second output + if isinstance(value, SSZValue): + yield f'{key}_{i}', 'data', encode(el) + yield f'{key}_{i}', 'ssz', serialize(el) + elif isinstance(value, bytes): + yield f'{key}_{i}', 'data', encode(el) + yield f'{key}_{i}', 'ssz', el yield f'{key}_count', 'meta', len(value) else: # Not a ssz value. From c329a003af23fd1b906ca80edf6a54331bcfbc0e Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 00:44:19 +0200 Subject: [PATCH 036/130] improve test gen logging --- test_libs/gen_helpers/gen_base/gen_runner.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test_libs/gen_helpers/gen_base/gen_runner.py b/test_libs/gen_helpers/gen_base/gen_runner.py index fff3a3436..f398becab 100644 --- a/test_libs/gen_helpers/gen_base/gen_runner.py +++ b/test_libs/gen_helpers/gen_base/gen_runner.py @@ -83,12 +83,13 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]): yaml = YAML(pure=True) yaml.default_flow_style = None - print(f"Generating tests into {output_dir}...") - print(f"Reading config presets and fork timelines from {args.configs_path}") + print(f"Generating tests into {output_dir}") + print(f"Reading configs from {args.configs_path}") for tprov in test_providers: # loads configuration etc. config_name = tprov.prepare(args.configs_path) + print(f"generating tests with config '{config_name}' ...") for test_case in tprov.make_cases(): case_dir = Path(output_dir) / Path(config_name) / Path(test_case.fork_name) \ / Path(test_case.runner_name) / Path(test_case.handler_name) \ @@ -133,4 +134,4 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]): except Exception as e: print(f"ERROR: failed to generate vector(s) for test {case_dir}: {e}") - print(f"completed {generator_name}") + print(f"completed {generator_name}") From 2dcad9a6bfcc69a8a9c6840eb33680decb354766 Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 02:09:25 +0200 Subject: [PATCH 037/130] add config filtering option --- test_libs/gen_helpers/gen_base/gen_runner.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/test_libs/gen_helpers/gen_base/gen_runner.py b/test_libs/gen_helpers/gen_base/gen_runner.py index f398becab..ea332e945 100644 --- a/test_libs/gen_helpers/gen_base/gen_runner.py +++ b/test_libs/gen_helpers/gen_base/gen_runner.py @@ -70,7 +70,16 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]): dest="configs_path", required=True, type=validate_configs_dir, - help="specify the path of the configs directory (containing constants_presets and fork_timelines)", + help="specify the path of the configs directory", + ) + parser.add_argument( + "-l", + "--config-list", + dest="config_list", + nargs='*', + type=str, + required=False, + help="specify configs to run with. Allows all if no config names are specified.", ) args = parser.parse_args() @@ -86,9 +95,17 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]): print(f"Generating tests into {output_dir}") print(f"Reading configs from {args.configs_path}") + configs = args.config_list + if len(configs) != 0: + print(f"Filtering test-generator runs to only include configs: {', '.join(configs)}") + for tprov in test_providers: # loads configuration etc. config_name = tprov.prepare(args.configs_path) + if len(configs) != 0 and config_name not in configs: + print(f"skipping tests with config '{config_name}' since it is filtered out") + continue + print(f"generating tests with config '{config_name}' ...") for test_case in tprov.make_cases(): case_dir = Path(output_dir) / Path(config_name) / Path(test_case.fork_name) \ From f5e404298bf639d939020daedea7c3b61c31962f Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 03:07:42 +0200 Subject: [PATCH 038/130] update test generator documentation --- test_generators/README.md | 154 ++++++++++++------- test_libs/gen_helpers/README.md | 47 +++++- test_libs/gen_helpers/gen_base/gen_runner.py | 3 + 3 files changed, 147 insertions(+), 57 deletions(-) diff --git a/test_generators/README.md b/test_generators/README.md index 9fdb45f4f..7a4a5c536 100644 --- a/test_generators/README.md +++ b/test_generators/README.md @@ -1,11 +1,13 @@ # Eth 2.0 Test Generators -This directory contains all the generators for YAML tests, consumed by Eth 2.0 client implementations. +This directory contains all the generators for tests, consumed by Eth 2.0 client implementations. -Any issues with the generators and/or generated tests should be filed in the repository that hosts the generator outputs, here: [ethereum/eth2.0-spec-tests](https://github.com/ethereum/eth2.0-spec-tests). +Any issues with the generators and/or generated tests should be filed in the repository that hosts the generator outputs, + here: [ethereum/eth2.0-spec-tests](https://github.com/ethereum/eth2.0-spec-tests). -Whenever a release is made, the new tests are automatically built, and -[eth2TestGenBot](https://github.com/eth2TestGenBot) commits the changes to the test repository. +On releases, test generators are run by the release manager. Test-generation of mainnet tests can take a significant amount of time, and is better left out of a CI setup. + +An automated nightly tests release system, with a config filter applied, is being considered as implementation needs mature. ## How to run generators @@ -58,11 +60,11 @@ It's recommended to extend the base-generator. Create a `requirements.txt` in the root of your generator directory: ``` -eth-utils==1.6.0 ../../test_libs/gen_helpers ../../test_libs/config_helpers ../../test_libs/pyspec ``` + The config helper and pyspec is optional, but preferred. We encourage generators to derive tests from the spec itself in order to prevent code duplication and outdated tests. Applying configurations to the spec is simple and enables you to create test suites with different contexts. @@ -73,72 +75,115 @@ Install all the necessary requirements (re-run when you add more): pip3 install -r requirements.txt ``` +Note that you may need `PYTHONPATH` to include the pyspec directory, as with running normal tests, + to run test generators manually. The makefile handles this for you already. + And write your initial test generator, extending the base generator: -Write a `main.py` file. See example: +Write a `main.py` file. The shuffling test generator is a good minimal starting point: ```python -from gen_base import gen_runner, gen_suite, gen_typing - -from eth_utils import ( - to_dict, to_tuple -) - +from eth2spec.phase0 import spec as spec +from eth_utils import to_tuple +from gen_base import gen_runner, gen_typing from preset_loader import loader -from eth2spec.phase0 import spec +from typing import Iterable -@to_dict -def example_test_case(v: int): - yield "spec_SHARD_COUNT", spec.SHARD_COUNT - yield "example", v + +def shuffling_case_fn(seed, count): + yield 'mapping', 'data', { + 'seed': '0x' + seed.hex(), + 'count': count, + 'mapping': [int(spec.compute_shuffled_index(i, count, seed)) for i in range(count)] + } + + +def shuffling_case(seed, count): + return f'shuffle_0x{seed.hex()}_{count}', lambda: shuffling_case_fn(seed, count) @to_tuple -def generate_example_test_cases(): - for i in range(10): - yield example_test_case(i) +def shuffling_test_cases(): + for seed in [spec.hash(seed_init_value.to_bytes(length=4, byteorder='little')) for seed_init_value in range(30)]: + for count in [0, 1, 2, 3, 5, 10, 33, 100, 1000, 9999]: + yield shuffling_case(seed, count) -def example_minimal_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - presets = loader.load_presets(configs_path, 'minimal') - spec.apply_constants_preset(presets) +def create_provider(config_name: str) -> gen_typing.TestProvider: - return ("mini", "core", gen_suite.render_suite( - title="example_minimal", - summary="Minimal example suite, testing bar.", - forks_timeline="testing", - forks=["phase0"], - config="minimal", - handler="main", - test_cases=generate_example_test_cases())) + def prepare_fn(configs_path: str) -> str: + presets = loader.load_presets(configs_path, config_name) + spec.apply_constants_preset(presets) + return config_name + def cases_fn() -> Iterable[gen_typing.TestCase]: + for (case_name, case_fn) in shuffling_test_cases(): + yield gen_typing.TestCase( + fork_name='phase0', + runner_name='shuffling', + handler_name='core', + suite_name='shuffle', + case_name=case_name, + case_fn=case_fn + ) -def example_mainnet_suite(configs_path: str) -> gen_typing.TestSuiteOutput: - presets = loader.load_presets(configs_path, 'mainnet') - spec.apply_constants_preset(presets) - - return ("full", "core", gen_suite.render_suite( - title="example_main_net", - summary="Main net based example suite.", - forks_timeline= "mainnet", - forks=["phase0"], - config="testing", - handler="main", - test_cases=generate_example_test_cases())) + return gen_typing.TestProvider(prepare=prepare_fn, make_cases=cases_fn) if __name__ == "__main__": - gen_runner.run_generator("example", [example_minimal_suite, example_mainnet_suite]) + gen_runner.run_generator("shuffling", [create_provider("minimal"), create_provider("mainnet")]) ``` +This generator: +- builds off of `gen_runner.run_generator` to handle configuration / filter / output logic. +- parametrized the creation of a test-provider to support multiple configs. +- Iterates through tests cases. +- Each test case provides a `case_fn`, to be executed by the `gen_runner.run_generator` if the case needs to be generated. But skipped otherwise. + +To extend this, one could decide to parametrize the `shuffling_test_cases` function, and create test provider for any test-yielding function. + +Another example, to generate tests from pytests: + +```python +def create_provider(handler_name: str, tests_src, config_name: str) -> gen_typing.TestProvider: + + def prepare_fn(configs_path: str) -> str: + presets = loader.load_presets(configs_path, config_name) + spec_phase0.apply_constants_preset(presets) + spec_phase1.apply_constants_preset(presets) + return config_name + + def cases_fn() -> Iterable[gen_typing.TestCase]: + return generate_from_tests( + runner_name='epoch_processing', + handler_name=handler_name, + src=tests_src, + fork_name='phase0' + ) + + return gen_typing.TestProvider(prepare=prepare_fn, make_cases=cases_fn) + + +if __name__ == "__main__": + gen_runner.run_generator("epoch_processing", [ + create_provider('crosslinks', test_process_crosslinks, 'minimal'), + ... + ]) + +``` + +Here multiple phases load the configuration, and the stream of test cases is derived from a pytest file using the `generate_from_tests` utility. + + Recommendations: -- You can have more than just one suite creator, e.g. ` gen_runner.run_generator("foo", [bar_test_suite, abc_test_suite, example_test_suite])`. -- You can concatenate lists of test cases if you don't want to split it up in suites, however, make sure they can be run with one handler. -- You can split your suite creators into different Python files/packages; this is good for code organization. -- Use config "minimal" for performance, but also implement a suite with the default config where necessary. -- You may be able to write your test suite creator in a way where it does not make assumptions on constants. - If so, you can generate test suites with different configurations for the same scenario (see example). -- The test-generator accepts `--output` and `--force` (overwrite output). +- You can have more than just one test provider. +- Your test provider is free to output any configuration and combination of runner/handler/fork/case name. +- You can split your test case generators into different Python files/packages; this is good for code organization. +- Use config `minimal` for performance and simplicity, but also implement a suite with the `mainnet` config where necessary. +- You may be able to write your test case provider in a way where it does not make assumptions on constants. + If so, you can generate test cases with different configurations for the same scenario (see example). +- See [`test_libs/gen_helpers/README.md`](../test_libs/gen_helpers/README.md) for command line options for generators. + ## How to add a new test generator @@ -151,11 +196,10 @@ To add a new test generator that builds `New Tests`: 3. Your generator is assumed to have a `main.py` file in its root. By adding the base generator to your requirements, you can make a generator really easily. See docs below. 4. Your generator is called with `-o some/file/path/for_testing/can/be_anything -c some/other/path/to_configs/`. - The base generator helps you handle this; you only have to define suite headers - and a list of tests for each suite you generate. + The base generator helps you handle this; you only have to define test case providers. 5. Finally, add any linting or testing commands to the - [circleci config file](https://github.com/ethereum/eth2.0-test-generators/blob/master/.circleci/config.yml) - if desired to increase code quality. + [circleci config file](../.circleci/config.yml) if desired to increase code quality. + Or add it to the [`Makefile`](../Makefile), if it can be run locally. *Note*: You do not have to change the makefile. However, if necessary (e.g. not using Python, or mixing in other languages), submit an issue, and it can be a special case. diff --git a/test_libs/gen_helpers/README.md b/test_libs/gen_helpers/README.md index 4dcfacef7..9cce48d83 100644 --- a/test_libs/gen_helpers/README.md +++ b/test_libs/gen_helpers/README.md @@ -1,5 +1,48 @@ # ETH 2.0 test generator helpers -`gen_base`: A util to quickly write new test suite generators with. -See [Generators documentation](../../test_generators/README.md). +## `gen_base` +A util to quickly write new test suite generators with. + +See [Generators documentation](../../test_generators/README.md) for integration details. + +Options: + +``` +-o OUTPUT_DIR -- Output directory to write tests to. The directory must exist. + This directory will hold the top-level test directories (per-config directories). + +[-f] -- Optional. Force-run the generator: if false, existing test case folder will be detected, + and the test generator will not run the function to generate the test case with. + If true, all cases will run regardless, and files will be overwritten. + Other existing files are not deleted. + +-c CONFIGS_PATH -- The directory to load configs for pyspec from. A config is a simple key-value yaml file. + Use `../../configs/` when running from the root dir of a generator, and requiring the standard spec configs. + +[-l [CONFIG_LIST [CONFIG_LIST ...]]] -- Optional. Define which configs to run. + Test providers loading other configs will be ignored. If none are specified, no config will be ignored. +``` + +`gen_from_tests`: A util to derive tests from a tests source file. + +This requires the tests to yield test-case-part outputs. These outputs are then written to the test case directory. +Yielding data is illegal in normal pytests, so it is only done when in "generator mode". +This functionality can be attached to any function by using the `vector_test()` decorator found in `ethspec/tests/utils.py`. + +The yielding pattern is: + +2 value style: `yield `. The kind of output will be inferred from the value by the `vector_test()` decorator. + +3 value style: `yield `. + +Test part output kinds: +- `ssz`: value is expected to be a `bytes`, and the raw data is written to a `.ssz` file. +- `data`: value is expected to be any python object that can be dumped as YAML. Output is written to `.yaml` +- `meta`: these key-value pairs are collected into a dict, and then collectively written to a metadata + file named `meta.yaml`, if anything is yielded with `meta` empty. + +The `vector_test()` decorator can detect pyspec SSZ types, and output them both as `data` and `ssz`, for the test consumer to choose. + +Note that the yielded outputs are processed before the test continues. It is safe to yield information that later mutates, + as the output will already be encoded to yaml or ssz bytes. This avoids the need to deep-copy the whole object. diff --git a/test_libs/gen_helpers/gen_base/gen_runner.py b/test_libs/gen_helpers/gen_base/gen_runner.py index ea332e945..1eb6bac56 100644 --- a/test_libs/gen_helpers/gen_base/gen_runner.py +++ b/test_libs/gen_helpers/gen_base/gen_runner.py @@ -96,6 +96,9 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]): print(f"Reading configs from {args.configs_path}") configs = args.config_list + if configs is None: + configs = [] + if len(configs) != 0: print(f"Filtering test-generator runs to only include configs: {', '.join(configs)}") From bdebfe31dfbed77a26bdca67003312435ab91e57 Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 03:09:48 +0200 Subject: [PATCH 039/130] organize test-case-part explanation better --- test_libs/gen_helpers/README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test_libs/gen_helpers/README.md b/test_libs/gen_helpers/README.md index 9cce48d83..dfda434c3 100644 --- a/test_libs/gen_helpers/README.md +++ b/test_libs/gen_helpers/README.md @@ -24,12 +24,18 @@ Options: Test providers loading other configs will be ignored. If none are specified, no config will be ignored. ``` -`gen_from_tests`: A util to derive tests from a tests source file. +## `gen_from_tests` + +This is an util to derive tests from a tests source file. This requires the tests to yield test-case-part outputs. These outputs are then written to the test case directory. Yielding data is illegal in normal pytests, so it is only done when in "generator mode". This functionality can be attached to any function by using the `vector_test()` decorator found in `ethspec/tests/utils.py`. +## Test-case parts + +Test cases consist of parts, which are yielded to the base generator one by one. + The yielding pattern is: 2 value style: `yield `. The kind of output will be inferred from the value by the `vector_test()` decorator. From c91cefc76c53f3069f167e3a5786ba6dcd6687a9 Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 03:11:59 +0200 Subject: [PATCH 040/130] move bls tests to general config dir --- test_generators/bls/main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test_generators/bls/main.py b/test_generators/bls/main.py index 914983f43..a74397e77 100644 --- a/test_generators/bls/main.py +++ b/test_generators/bls/main.py @@ -166,8 +166,9 @@ def create_provider(handler_name: str, test_case_fn: Callable[[], Iterable[Tuple[str, Dict[str, Any]]]]) -> gen_typing.TestProvider: def prepare_fn(configs_path: str) -> str: - # Nothing to load / change in spec. Maybe in future forks. Put the tests into the minimal config category. - return 'minimal' + # Nothing to load / change in spec. Maybe in future forks. + # Put the tests into the general config category, to not require any particular configuration. + return 'general' def cases_fn() -> Iterable[gen_typing.TestCase]: for data in test_case_fn(): From 2ba3cc993d0fd7aa80c6d9f8d4aa1f999c2da4dc Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 03:40:25 +0200 Subject: [PATCH 041/130] update test format doc and SSZ-static format docs --- specs/test_formats/README.md | 28 +++++++++--------- specs/test_formats/ssz_static/README.md | 2 +- specs/test_formats/ssz_static/core.md | 39 ++++++++++++++++++++----- 3 files changed, 46 insertions(+), 23 deletions(-) diff --git a/specs/test_formats/README.md b/specs/test_formats/README.md index 196315185..0b9d64fa5 100644 --- a/specs/test_formats/README.md +++ b/specs/test_formats/README.md @@ -49,22 +49,20 @@ Test formats: ## Glossary -- `generator`: a program that outputs one or more `suite` files. - - A generator should only output one `type` of test. - - A generator is free to output multiple `suite` files, optionally with different `handler`s. -- `type`: the specialization of one single `generator`. -- `suite`: a YAML file with: - - a header: describes the `suite`, and defines what the `suite` is for - - a list of test cases +- `generator`: a program that outputs one or more test-cases, each organized into a `config > runner > handler > suite` hierarchy. +- `config`: tests are grouped by configuration used for spec presets. In addition to the standard configurations, + `general` may be used as a catch-all for tests not restricted to one configuration. (E.g. BLS). +- `type`: the specialization of one single `generator`. E.g. epoch processing. - `runner`: where a generator is a *"producer"*, this is the *"consumer"*. - A `runner` focuses on *only one* `type`, and each type has *only one* `runner`. -- `handler`: a `runner` may be too limited sometimes, you may have a `suite` with a specific focus that requires a different format. +- `handler`: a `runner` may be too limited sometimes, you may have a set of tests with a specific focus that requires a different format. To facilitate this, you specify a `handler`: the runner can deal with the format by using the specified handler. - Using a `handler` in a `runner` is optional. -- `case`: a test case, an entry in the `test_cases` list of a `suite`. A case can be anything in general, - but its format should be well-defined in the documentation corresponding to the `type` (and `handler`).\ - A test has the same exact configuration and fork context as the other entries in the `case` list of its `suite`. - +- `suite`: a directory containing test cases that are coherent. Each `suite` under the same `handler` shares the same format. + This is an organizational/cosmetic hierarchy layer. +- `case`: a test case, a directory in a `suite`. A case can be anything in general, + but its format should be well-defined in the documentation corresponding to the `type` (and `handler`). +- `case part`: a test case consists of different files, possibly in different formats, to facilitate the specific test case format better. + Optionally, a `meta.yaml` is included to declare meta-data for the test, e.g. BLS requirements. ## Test format philosophy @@ -121,10 +119,12 @@ The well known bls/shuffling/ssz_static/operations/epoch_processing/etc. Handler ### `/` Specialization within category. All suites in here will have the same test case format. +Using a `handler` in a `runner` is optional. A `core` (or other generic) handler may be used if the `runner` does not have different formats. ### `/` -Suites are split up. Suite size does not change memory bounds, and makes lookups of particular tests fast to find and load. +Suites are split up. Suite size (i.e. the amount of tests) does not change the maximum memory requirement, as test cases can be loaded one by one. +This also makes filtered sets of tests fast and easy to load. ### `/` diff --git a/specs/test_formats/ssz_static/README.md b/specs/test_formats/ssz_static/README.md index 1df2cb5f6..1dfe0c23f 100644 --- a/specs/test_formats/ssz_static/README.md +++ b/specs/test_formats/ssz_static/README.md @@ -3,6 +3,6 @@ This set of test-suites provides static testing for SSZ: to instantiate just the known Eth 2.0 SSZ types from binary data. -This series of tests is based on the spec-maintained `minimal_ssz.py`, i.e. fully consistent with the SSZ spec. +This series of tests is based on the spec-maintained `eth2spec/utils/ssz/ssz_impl.py`, i.e. fully consistent with the SSZ spec. Test format documentation can be found here: [core test format](./core.md). diff --git a/specs/test_formats/ssz_static/core.md b/specs/test_formats/ssz_static/core.md index f24a225b0..1816e7d4d 100644 --- a/specs/test_formats/ssz_static/core.md +++ b/specs/test_formats/ssz_static/core.md @@ -4,28 +4,51 @@ The goal of this type is to provide clients with a solid reference for how the k Each object described in the Phase 0 spec is covered. This is important, as many of the clients aiming to serialize/deserialize objects directly into structs/classes do not support (or have alternatives for) generic SSZ encoding/decoding. + This test-format ensures these direct serializations are covered. +Note that this test suite does not cover the invalid-encoding case: + SSZ implementations should be hardened against invalid inputs with the other SSZ tests as guide, along with fuzzing. + ## Test case format +Each SSZ type is a `handler`, since the format is semantically different: the type of the data is different. + +One can iterate over the handlers, and select the type based on the handler name. +Suites are then the same format, but each specialized in one randomization mode. +Some randomization modes may only produce a single test case (e.g. the all-zeroes case). + +The output parts are: `meta.yaml`, `serialized.ssz`, `value.yaml` + +### `meta.yaml` + +For non-container SSZ type: + ```yaml -SomeObjectName: -- key, object name, formatted as in spec. E.g. "BeaconBlock". - value: dynamic -- the YAML-encoded value, of the type specified by type_name. - serialized: bytes -- string, SSZ-serialized data, hex encoded, with prefix 0x - root: bytes32 -- string, hash-tree-root of the value, hex encoded, with prefix 0x - signing_root: bytes32 -- string, signing-root of the value, hex encoded, with prefix 0x. Optional, present if type contains ``signature`` field +root: bytes32 -- string, hash-tree-root of the value, hex encoded, with prefix 0x +signing_root: bytes32 -- string, signing-root of the value, hex encoded, with prefix 0x. + Optional, present if type is a container and ends with a ``signature`` field. ``` +### `serialized.ssz` + +The raw encoded bytes. + +### `value.yaml` + +The same value as `serialized.ssz`, represented as YAML. + + ## Condition A test-runner can implement the following assertions: - Serialization: After parsing the `value`, SSZ-serialize it: the output should match `serialized` -- Hash-tree-root: After parsing the `value`, Hash-tree-root it: the output should match `root` - - Optionally also check signing-root, if present. +- Hash-tree-root: After parsing the `value` (or deserializing `serialized`), Hash-tree-root it: the output should match `root` + - Optionally also check `signing_root`, if present. - Deserialization: SSZ-deserialize the `serialized` value, and see if it matches the parsed `value` -## References +## References **`serialized`**—[SSZ serialization](../../simple-serialize.md#serialization) **`root`**—[hash_tree_root](../../simple-serialize.md#merkleization) function From eba473079b178d07ae76be2cbc7bd3081582702c Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 12:49:18 +0200 Subject: [PATCH 042/130] update makefile to support generators outputting to same config, or even same runner dir --- Makefile | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 318056689..ecd35e84e 100644 --- a/Makefile +++ b/Makefile @@ -2,17 +2,20 @@ SPEC_DIR = ./specs SCRIPT_DIR = ./scripts TEST_LIBS_DIR = ./test_libs PY_SPEC_DIR = $(TEST_LIBS_DIR)/pyspec -YAML_TEST_DIR = ./eth2.0-spec-tests/tests +TEST_VECTOR_DIR = ./eth2.0-spec-tests/tests GENERATOR_DIR = ./test_generators DEPOSIT_CONTRACT_DIR = ./deposit_contract CONFIGS_DIR = ./configs # Collect a list of generator names -GENERATORS = $(sort $(dir $(wildcard $(GENERATOR_DIR)/*/))) -# Map this list of generator paths to a list of test output paths -YAML_TEST_TARGETS = $(patsubst $(GENERATOR_DIR)/%, $(YAML_TEST_DIR)/%, $(GENERATORS)) +GENERATORS = $(sort $(dir $(wildcard $(GENERATOR_DIR)/*/.))) +# Map this list of generator paths to "gen_{generator name}" entries +GENERATOR_TARGETS = $(patsubst $(GENERATOR_DIR)/%/, gen_%, $(GENERATORS)) GENERATOR_VENVS = $(patsubst $(GENERATOR_DIR)/%, $(GENERATOR_DIR)/%venv, $(GENERATORS)) +# To check generator matching: +#$(info $$GENERATOR_TARGETS is [${GENERATOR_TARGETS}]) + PY_SPEC_PHASE_0_TARGETS = $(PY_SPEC_DIR)/eth2spec/phase0/spec.py PY_SPEC_PHASE_0_DEPS = $(SPEC_DIR)/core/0_*.md @@ -24,14 +27,14 @@ PY_SPEC_ALL_TARGETS = $(PY_SPEC_PHASE_0_TARGETS) $(PY_SPEC_PHASE_1_TARGETS) COV_HTML_OUT=.htmlcov COV_INDEX_FILE=$(PY_SPEC_DIR)/$(COV_HTML_OUT)/index.html -.PHONY: clean all test citest lint gen_yaml_tests pyspec phase0 phase1 install_test open_cov \ +.PHONY: clean partial_clean all test citest lint generate_tests pyspec phase0 phase1 install_test open_cov \ install_deposit_contract_test test_deposit_contract compile_deposit_contract -all: $(PY_SPEC_ALL_TARGETS) $(YAML_TEST_DIR) $(YAML_TEST_TARGETS) +all: $(PY_SPEC_ALL_TARGETS) # deletes everything except the venvs partial_clean: - rm -rf $(YAML_TEST_DIR) + rm -rf $(TEST_VECTOR_DIR) rm -rf $(GENERATOR_VENVS) rm -rf $(PY_SPEC_DIR)/.pytest_cache rm -rf $(PY_SPEC_ALL_TARGETS) @@ -44,8 +47,8 @@ clean: partial_clean rm -rf $(PY_SPEC_DIR)/venv rm -rf $(DEPOSIT_CONTRACT_DIR)/venv -# "make gen_yaml_tests" to run generators -gen_yaml_tests: $(PY_SPEC_ALL_TARGETS) $(YAML_TEST_TARGETS) +# "make generate_tests" to run all generators +generate_tests: $(PY_SPEC_ALL_TARGETS) $(GENERATOR_TARGETS) # installs the packages to run pyspec tests install_test: @@ -90,8 +93,8 @@ $(PY_SPEC_DIR)/eth2spec/phase1/spec.py: $(PY_SPEC_PHASE_1_DEPS) CURRENT_DIR = ${CURDIR} -# The function that builds a set of suite files, by calling a generator for the given type (param 1) -define build_yaml_tests +# Runs a generator, identified by param 1 +define run_generator # Started! # Create output directory # Navigate to the generator @@ -101,23 +104,23 @@ define build_yaml_tests # Run the generator. The generator is assumed to have an "main.py" file. # We output to the tests dir (generator program should accept a "-o " argument. echo "generator $(1) started"; \ - mkdir -p $(YAML_TEST_DIR)$(1); \ - cd $(GENERATOR_DIR)$(1); \ + mkdir -p $(TEST_VECTOR_DIR); \ + cd $(GENERATOR_DIR)/$(1); \ if ! test -d venv; then python3 -m venv venv; fi; \ . venv/bin/activate; \ pip3 install -r requirements.txt; \ - python3 main.py -o $(CURRENT_DIR)/$(YAML_TEST_DIR)$(1) -c $(CURRENT_DIR)/$(CONFIGS_DIR); \ + python3 main.py -o $(CURRENT_DIR)/$(TEST_VECTOR_DIR) -c $(CURRENT_DIR)/$(CONFIGS_DIR); \ echo "generator $(1) finished" endef # The tests dir itself is simply build by creating the directory (recursively creating deeper directories if necessary) -$(YAML_TEST_DIR): - $(info creating directory, to output yaml targets to: ${YAML_TEST_TARGETS}) +$(TEST_VECTOR_DIR): + $(info creating test output directory, for generators: ${GENERATOR_TARGETS}) mkdir -p $@ -$(YAML_TEST_DIR)/: - $(info ignoring duplicate yaml tests dir) +$(TEST_VECTOR_DIR)/: + $(info ignoring duplicate tests dir) -# For any target within the tests dir, build it using the build_yaml_tests function. +# For any generator, build it using the run_generator function. # (creation of output dir is a dependency) -$(YAML_TEST_DIR)%: $(PY_SPEC_ALL_TARGETS) $(YAML_TEST_DIR) - $(call build_yaml_tests,$*) +gen_%: $(PY_SPEC_ALL_TARGETS) $(TEST_VECTOR_DIR) + $(call run_generator,$*) From 79f6ab575246415302338e479a62a50460d07484 Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 14:06:26 +0200 Subject: [PATCH 043/130] fix imports in genesis generator --- test_generators/genesis/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test_generators/genesis/main.py b/test_generators/genesis/main.py index 6091a63d8..9a91afbfd 100644 --- a/test_generators/genesis/main.py +++ b/test_generators/genesis/main.py @@ -12,8 +12,7 @@ def create_provider(handler_name: str, tests_src, config_name: str) -> gen_typin def prepare_fn(configs_path: str) -> str: presets = loader.load_presets(configs_path, config_name) - spec_phase0.apply_constants_preset(presets) - spec_phase1.apply_constants_preset(presets) + spec.apply_constants_preset(presets) return config_name def cases_fn() -> Iterable[gen_typing.TestCase]: From ccf472af686a11a070661668adba77450b7d2ac4 Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 14:08:25 +0200 Subject: [PATCH 044/130] remove old requirements from generators --- test_generators/epoch_processing/requirements.txt | 1 - test_generators/genesis/requirements.txt | 1 - test_generators/sanity/requirements.txt | 1 - 3 files changed, 3 deletions(-) diff --git a/test_generators/epoch_processing/requirements.txt b/test_generators/epoch_processing/requirements.txt index 595cee69c..3314093d3 100644 --- a/test_generators/epoch_processing/requirements.txt +++ b/test_generators/epoch_processing/requirements.txt @@ -1,4 +1,3 @@ -eth-utils==1.6.0 ../../test_libs/gen_helpers ../../test_libs/config_helpers ../../test_libs/pyspec \ No newline at end of file diff --git a/test_generators/genesis/requirements.txt b/test_generators/genesis/requirements.txt index 595cee69c..3314093d3 100644 --- a/test_generators/genesis/requirements.txt +++ b/test_generators/genesis/requirements.txt @@ -1,4 +1,3 @@ -eth-utils==1.6.0 ../../test_libs/gen_helpers ../../test_libs/config_helpers ../../test_libs/pyspec \ No newline at end of file diff --git a/test_generators/sanity/requirements.txt b/test_generators/sanity/requirements.txt index 595cee69c..3314093d3 100644 --- a/test_generators/sanity/requirements.txt +++ b/test_generators/sanity/requirements.txt @@ -1,4 +1,3 @@ -eth-utils==1.6.0 ../../test_libs/gen_helpers ../../test_libs/config_helpers ../../test_libs/pyspec \ No newline at end of file From 7165932012bc107fd26c69d18c6bf466c8cd6ca6 Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 14:08:50 +0200 Subject: [PATCH 045/130] output list-type parts correctly --- test_libs/pyspec/eth2spec/test/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_libs/pyspec/eth2spec/test/utils.py b/test_libs/pyspec/eth2spec/test/utils.py index 6aaf14054..f02e4153b 100644 --- a/test_libs/pyspec/eth2spec/test/utils.py +++ b/test_libs/pyspec/eth2spec/test/utils.py @@ -46,10 +46,10 @@ def vector_test(description: str = None): yield key, 'ssz', value elif isinstance(value, list) and all([isinstance(el, (SSZValue, bytes)) for el in value]): for i, el in enumerate(value): - if isinstance(value, SSZValue): + if isinstance(el, SSZValue): yield f'{key}_{i}', 'data', encode(el) yield f'{key}_{i}', 'ssz', serialize(el) - elif isinstance(value, bytes): + elif isinstance(el, bytes): yield f'{key}_{i}', 'data', encode(el) yield f'{key}_{i}', 'ssz', el yield f'{key}_count', 'meta', len(value) From ff2b533c40edd085bf158b9deff5e65fe66beb47 Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 14:09:20 +0200 Subject: [PATCH 046/130] update test format docs with new test structure --- specs/test_formats/bls/aggregate_pubkeys.md | 2 ++ specs/test_formats/bls/aggregate_sigs.md | 2 ++ .../bls/msg_hash_g2_compressed.md | 2 ++ .../bls/msg_hash_g2_uncompressed.md | 2 ++ specs/test_formats/bls/priv_to_pub.md | 2 ++ specs/test_formats/bls/sign_msg.md | 2 ++ specs/test_formats/epoch_processing/README.md | 20 +++++++++-- specs/test_formats/genesis/initialization.md | 36 +++++++++++++++---- specs/test_formats/genesis/validity.md | 18 ++++++---- specs/test_formats/operations/README.md | 31 ++++++++++++---- 10 files changed, 95 insertions(+), 22 deletions(-) diff --git a/specs/test_formats/bls/aggregate_pubkeys.md b/specs/test_formats/bls/aggregate_pubkeys.md index 43c7d6c6d..049ad6991 100644 --- a/specs/test_formats/bls/aggregate_pubkeys.md +++ b/specs/test_formats/bls/aggregate_pubkeys.md @@ -4,6 +4,8 @@ A BLS pubkey aggregation combines a series of pubkeys into a single pubkey. ## Test case format +The test data is declared in a `data.yaml` file: + ```yaml input: List[BLS Pubkey] -- list of input BLS pubkeys output: BLS Pubkey -- expected output, single BLS pubkey diff --git a/specs/test_formats/bls/aggregate_sigs.md b/specs/test_formats/bls/aggregate_sigs.md index 6690c3344..2252dbaa8 100644 --- a/specs/test_formats/bls/aggregate_sigs.md +++ b/specs/test_formats/bls/aggregate_sigs.md @@ -4,6 +4,8 @@ A BLS signature aggregation combines a series of signatures into a single signat ## Test case format +The test data is declared in a `data.yaml` file: + ```yaml input: List[BLS Signature] -- list of input BLS signatures output: BLS Signature -- expected output, single BLS signature diff --git a/specs/test_formats/bls/msg_hash_g2_compressed.md b/specs/test_formats/bls/msg_hash_g2_compressed.md index bbc1b82fe..761e819f2 100644 --- a/specs/test_formats/bls/msg_hash_g2_compressed.md +++ b/specs/test_formats/bls/msg_hash_g2_compressed.md @@ -4,6 +4,8 @@ A BLS compressed-hash to G2. ## Test case format +The test data is declared in a `data.yaml` file: + ```yaml input: message: bytes32 diff --git a/specs/test_formats/bls/msg_hash_g2_uncompressed.md b/specs/test_formats/bls/msg_hash_g2_uncompressed.md index c79afa94c..5ee535a38 100644 --- a/specs/test_formats/bls/msg_hash_g2_uncompressed.md +++ b/specs/test_formats/bls/msg_hash_g2_uncompressed.md @@ -4,6 +4,8 @@ A BLS uncompressed-hash to G2. ## Test case format +The test data is declared in a `data.yaml` file: + ```yaml input: message: bytes32 diff --git a/specs/test_formats/bls/priv_to_pub.md b/specs/test_formats/bls/priv_to_pub.md index ef62241ae..29c6b216a 100644 --- a/specs/test_formats/bls/priv_to_pub.md +++ b/specs/test_formats/bls/priv_to_pub.md @@ -4,6 +4,8 @@ A BLS private key to public key conversion. ## Test case format +The test data is declared in a `data.yaml` file: + ```yaml input: bytes32 -- the private key output: bytes48 -- the public key diff --git a/specs/test_formats/bls/sign_msg.md b/specs/test_formats/bls/sign_msg.md index 46f9f1697..6c4f88cd1 100644 --- a/specs/test_formats/bls/sign_msg.md +++ b/specs/test_formats/bls/sign_msg.md @@ -4,6 +4,8 @@ Message signing with BLS should produce a signature. ## Test case format +The test data is declared in a `data.yaml` file: + ```yaml input: privkey: bytes32 -- the private key used for signing diff --git a/specs/test_formats/epoch_processing/README.md b/specs/test_formats/epoch_processing/README.md index dbd4ca639..437604cdf 100644 --- a/specs/test_formats/epoch_processing/README.md +++ b/specs/test_formats/epoch_processing/README.md @@ -7,13 +7,27 @@ Hence, the format is shared between each test-handler. (See test condition docum ## Test case format +### `meta.yaml` + ```yaml -description: string -- description of test case, purely for debugging purposes +description: string -- Optional description of test case, purely for debugging purposes. + Tests should use the directory name of the test case as identifier, not the description. bls_setting: int -- see general test-format spec. -pre: BeaconState -- state before running the sub-transition -post: BeaconState -- state after applying the epoch sub-transition. ``` +### `pre.yaml` + +A YAML-encoded `BeaconState`, the state before running the epoch sub-transition. + +A `pre.ssz` is also available as substitute. + + +### `post.yaml` + +A YAML-encoded `BeaconState`, the state after applying the epoch sub-transition. + +A `post.ssz` is also available as substitute. + ## Condition A handler of the `epoch_processing` test-runner should process these cases, diff --git a/specs/test_formats/genesis/initialization.md b/specs/test_formats/genesis/initialization.md index 437dd91a3..b80729859 100644 --- a/specs/test_formats/genesis/initialization.md +++ b/specs/test_formats/genesis/initialization.md @@ -4,14 +4,36 @@ Tests the initialization of a genesis state based on Eth1 data. ## Test case format -```yaml -description: string -- description of test case, purely for debugging purposes -bls_setting: int -- see general test-format spec. -eth1_block_hash: Bytes32 -- the root of the Eth-1 block, hex encoded, with prefix 0x -eth1_timestamp: int -- the timestamp of the block, in seconds. -deposits: [Deposit] -- list of deposits to build the genesis state with -state: BeaconState -- the expected genesis state. +### `eth1_block_hash.yaml` + +A `Bytes32` hex encoded, with prefix 0x. The root of the Eth-1 block. + +A `eth1_block_hash.ssz` is available as substitute. + +### `eth1_timestamp.yaml` + +An integer. The timestamp of the block, in seconds. + +### `meta.yaml` + +A yaml file to help read the deposit count: + ``` +deposits_count: int -- Amount of deposits. +``` + +## `deposits_.yaml` + +A series of files, with `` ranging `[0, deposit_count)`. +Each deposit is also available as `deposits_.ssz` + +### `state.yaml` + +The expected genesis state. + +Also available as `state.ssz`. + +## Processing To process this test, build a genesis state with the provided `eth1_block_hash`, `eth1_timestamp` and `deposits`: `initialize_beacon_state_from_eth1(eth1_block_hash, eth1_timestamp, deposits)`, diff --git a/specs/test_formats/genesis/validity.md b/specs/test_formats/genesis/validity.md index 792923e3a..38f2b1b1f 100644 --- a/specs/test_formats/genesis/validity.md +++ b/specs/test_formats/genesis/validity.md @@ -4,12 +4,18 @@ Tests if a genesis state is valid, i.e. if it counts as trigger to launch. ## Test case format -```yaml -description: string -- description of test case, purely for debugging purposes -bls_setting: int -- see general test-format spec. -genesis: BeaconState -- state to validate. -is_valid: bool -- true if the genesis state is deemed valid as to launch with, false otherwise. -``` +### `genesis.yaml` + +A `BeaconState`, the state to validate as genesis candidate. + +Also available as `genesis.ssz`. + +### `is_valid.yaml` + +A boolean, true if the genesis state is deemed valid as to launch with, false otherwise. + + +## Processing To process the data, call `is_valid_genesis_state(genesis)`. diff --git a/specs/test_formats/operations/README.md b/specs/test_formats/operations/README.md index 37c5df498..7b0fca5f6 100644 --- a/specs/test_formats/operations/README.md +++ b/specs/test_formats/operations/README.md @@ -4,14 +4,33 @@ The different kinds of operations ("transactions") are tested individually with ## Test case format +### `meta.yaml` + ```yaml -description: string -- description of test case, purely for debugging purposes -bls_setting: int -- see general test-format spec. -pre: BeaconState -- state before applying the operation -: -- the YAML encoded operation, e.g. a "ProposerSlashing", or "Deposit". -post: BeaconState -- state after applying the operation. No value if operation processing is aborted. +description: string -- Optional description of test case, purely for debugging purposes. + Tests should use the directory name of the test case as identifier, not the description. +bls_setting: int -- see general test-format spec. ``` +### `pre.yaml` + +A YAML-encoded `BeaconState`, the state before applying the operation. + +A `pre.ssz` is also available as substitute. + +### `.yaml` + +A YAML-encoded operation object, e.g. a `ProposerSlashing`, or `Deposit`. + +A `.ssz` is also available as substitute. + +### `post.yaml` + +A YAML-encoded `BeaconState`, the state after applying the operation. No value if operation processing is aborted. + +A `post.ssz` is also available as substitute. + + ## Condition A handler of the `operations` test-runner should process these cases, @@ -24,7 +43,7 @@ Operations: |-------------------------|----------------------|----------------------|--------------------------------------------------------| | `attestation` | `Attestation` | `attestation` | `process_attestation(state, attestation)` | | `attester_slashing` | `AttesterSlashing` | `attester_slashing` | `process_attester_slashing(state, attester_slashing)` | -| `block_header` | `Block` | `block` | `process_block_header(state, block)` | +| `block_header` | `Block` | **`block** | `process_block_header(state, block)` | | `deposit` | `Deposit` | `deposit` | `process_deposit(state, deposit)` | | `proposer_slashing` | `ProposerSlashing` | `proposer_slashing` | `process_proposer_slashing(state, proposer_slashing)` | | `transfer` | `Transfer` | `transfer` | `process_transfer(state, transfer)` | From 5ec941e6981771e0c5bfed46c5a5669b44bf4d2f Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 15:06:15 +0200 Subject: [PATCH 047/130] more documentation updates --- specs/test_formats/genesis/initialization.md | 10 +++--- specs/test_formats/sanity/blocks.md | 32 +++++++++++++++++--- specs/test_formats/sanity/slots.md | 28 ++++++++++++++--- specs/test_formats/shuffling/README.md | 16 +++++++--- 4 files changed, 69 insertions(+), 17 deletions(-) diff --git a/specs/test_formats/genesis/initialization.md b/specs/test_formats/genesis/initialization.md index b80729859..585a6f566 100644 --- a/specs/test_formats/genesis/initialization.md +++ b/specs/test_formats/genesis/initialization.md @@ -18,18 +18,20 @@ An integer. The timestamp of the block, in seconds. A yaml file to help read the deposit count: -``` +```yaml deposits_count: int -- Amount of deposits. ``` -## `deposits_.yaml` +### `deposits_.yaml` + +A series of files, with `` in range `[0, deposits_count)`. Deposits need to be processed in order. +Each file is a YAML-encoded `Deposit` object. -A series of files, with `` ranging `[0, deposit_count)`. Each deposit is also available as `deposits_.ssz` ### `state.yaml` -The expected genesis state. +The expected genesis state. A YAML-encoded `BeaconState` object. Also available as `state.ssz`. diff --git a/specs/test_formats/sanity/blocks.md b/specs/test_formats/sanity/blocks.md index 3004a6de7..1a32105a3 100644 --- a/specs/test_formats/sanity/blocks.md +++ b/specs/test_formats/sanity/blocks.md @@ -4,14 +4,38 @@ Sanity tests to cover a series of one or more blocks being processed, aiming to ## Test case format +### `meta.yaml` + ```yaml -description: string -- description of test case, purely for debugging purposes +description: string -- Optional. Description of test case, purely for debugging purposes. bls_setting: int -- see general test-format spec. -pre: BeaconState -- state before running through the transitions triggered by the blocks. -blocks: [BeaconBlock] -- blocks to process, in given order, following the main transition function (i.e. process slot and epoch transitions in between blocks as normal) -post: BeaconState -- state after applying all the transitions triggered by the blocks. +blocks_count: int -- the number of blocks processed in this test. ``` + +### `pre.yaml` + +A YAML-encoded `BeaconState`, the state before running the block transitions. + +A `pre.ssz` is also available as substitute. + + +### `blocks_.yaml` + +A series of files, with `` in range `[0, blocks_count)`. Blocks need to be processed in order, + following the main transition function (i.e. process slot and epoch transitions in between blocks as normal) + +Each file is a YAML-encoded `BeaconBlock`. + +Each block is also available as `blocks_.ssz` + +### `post.yaml` + +A YAML-encoded `BeaconState`, the state after applying the block transitions. + +A `post.ssz` is also available as substitute. + + ## Condition The resulting state should match the expected `post` state, or if the `post` state is left blank, diff --git a/specs/test_formats/sanity/slots.md b/specs/test_formats/sanity/slots.md index 04fecd186..c41a56c49 100644 --- a/specs/test_formats/sanity/slots.md +++ b/specs/test_formats/sanity/slots.md @@ -4,14 +4,34 @@ Sanity tests to cover a series of one or more empty-slot transitions being proce ## Test case format +### `meta.yaml` + ```yaml -description: string -- description of test case, purely for debugging purposes +description: string -- Optional. Description of test case, purely for debugging purposes. bls_setting: int -- see general test-format spec. -pre: BeaconState -- state before running through the transitions. -slots: N -- amount of slots to process, N being a positive number. -post: BeaconState -- state after applying all the transitions. ``` + +### `pre.yaml` + +A YAML-encoded `BeaconState`, the state before running the transitions. + +A `pre.ssz` is also available as substitute. + + +### `slots.yaml` + +An integer. The amount of slots to process (i.e. the difference in slots between pre and post), always a positive number. + +### `post.yaml` + +A YAML-encoded `BeaconState`, the state after applying the transitions. + +A `post.ssz` is also available as substitute. + + +### Processing + The transition with pure time, no blocks, is known as `process_slots(state, slot)` in the spec. This runs state-caching (pure slot transition) and epoch processing (every E slots). diff --git a/specs/test_formats/shuffling/README.md b/specs/test_formats/shuffling/README.md index 25074742d..24ec8c568 100644 --- a/specs/test_formats/shuffling/README.md +++ b/specs/test_formats/shuffling/README.md @@ -7,26 +7,32 @@ Clients may take different approaches to shuffling, for optimizing, and supporting advanced lookup behavior back in older history. For implementers, possible test runners implementing testing can include: -1) Just test permute-index, run it for each index `i` in `range(count)`, and check against expected `output[i]` (default spec implementation). +1) Just test permute-index, run it for each index `i` in `range(count)`, and check against expected `mapping[i]` (default spec implementation). 2) Test un-permute-index (the reverse lookup; implemented by running the shuffling rounds in reverse, from `round_count-1` to `0`). 3) Test the optimized complete shuffle, where all indices are shuffled at once; test output in one go. 4) Test complete shuffle in reverse (reverse rounds, same as #2). ## Test case format +### `mapping.yaml` + ```yaml seed: bytes32 count: int -shuffled: List[int] +mapping: List[int] ``` - The `bytes32` is encoded a string, hexadecimal encoding, prefixed with `0x`. - Integers are validator indices. These are `uint64`, but realistically they are not as big. The `count` specifies the validator registry size. One should compute the shuffling for indices `0, 1, 2, 3, ..., count (exclusive)`. -Seed is the raw shuffling seed, passed to permute-index (or optimized shuffling approach). + +The `seed` is the raw shuffling seed, passed to permute-index (or optimized shuffling approach). + +The `mapping` is a look up array, constructed as `[spec.compute_shuffled_index(i, count, seed) for i in range(count)]` +I.e. `mapping[i]` is the shuffled location of `i`. ## Condition -The resulting list should match the expected output `shuffled` after shuffling the implied input, using the given `seed`. - +The resulting list should match the expected output after shuffling the implied input, using the given `seed`. +The output is checked using the `mapping`, based on the shuffling test type (e.g. can be backwards shuffling). From cf7d65e8ff837abfd529fa4ab0381610c7ffd021 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Tue, 30 Jul 2019 12:15:46 -0400 Subject: [PATCH 048/130] Added generalized index handling functions --- specs/light_client/merkle_proofs.md | 38 ++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index b058be7ca..f62dc8d5c 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -115,7 +115,7 @@ def get_item_position(typ: Type, index: Union[int, str]) -> Tuple[int, int, int] raise Exception("Only lists/vectors/containers supported") -def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> int: +def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> GeneralizedIndex: """ Converts a path (eg. `[7, "foo", 3]` for `x[7].foo[3]`, `[12, "bar", "__len__"]` for `len(x[12].bar)`) into the generalized index representing its position in the Merkle tree. @@ -131,6 +131,42 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> int: return root ``` +### Helpers for generalized indices + +#### `concat_generalized_indices` + +```python +def concat_generalized_indices(*indices: Sequence[GeneralizedIndex]) -> GeneralizedIndex: + """ + Given generalized indices i1 for A -> B, i2 for B -> C .... i_n for Y -> Z, returns + the generalized index for A -> Z. + """ + o = GeneralizedIndex(1) + for i in indices: + o = o * get_previous_power_of_2(i) + i + return o +``` + +#### `get_generalized_index_length` + +```python +def get_generalized_index_length(index: GeneralizedIndex) -> int: + """ + Returns the length of a path represented by a generalized index. + """ + return log(index) +``` + +#### `get_generalized_index_bit` + +```python +def get_generalized_index_bit(index: GeneralizedIndex, bit: int) -> bool: + """ + Returns the i'th bit of a generalized index. + """ + return (index & (1 << bit)) > 0 +``` + ## Merkle multiproofs We define a Merkle multiproof as a minimal subset of nodes in a Merkle tree needed to fully authenticate that a set of nodes actually are part of a Merkle tree with some specified root, at a particular set of generalized indices. For example, here is the Merkle multiproof for positions 0, 1, 6 in an 8-node Merkle tree (i.e. generalized indices 8, 9, 14): From 5bdcd269ea05d196d49397549b9848ce41e3278f Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 22:16:43 +0200 Subject: [PATCH 049/130] new ssz generic format + typo fix in shuffling format doc --- specs/test_formats/shuffling/README.md | 2 +- specs/test_formats/ssz_generic/README.md | 201 +++++++++++++++++++++-- specs/test_formats/ssz_generic/uint.md | 19 --- 3 files changed, 190 insertions(+), 32 deletions(-) delete mode 100644 specs/test_formats/ssz_generic/uint.md diff --git a/specs/test_formats/shuffling/README.md b/specs/test_formats/shuffling/README.md index 24ec8c568..a2184020b 100644 --- a/specs/test_formats/shuffling/README.md +++ b/specs/test_formats/shuffling/README.md @@ -22,7 +22,7 @@ count: int mapping: List[int] ``` -- The `bytes32` is encoded a string, hexadecimal encoding, prefixed with `0x`. +- The `bytes32` is encoded as a string, hexadecimal encoding, prefixed with `0x`. - Integers are validator indices. These are `uint64`, but realistically they are not as big. The `count` specifies the validator registry size. One should compute the shuffling for indices `0, 1, 2, 3, ..., count (exclusive)`. diff --git a/specs/test_formats/ssz_generic/README.md b/specs/test_formats/ssz_generic/README.md index da0898087..a47d1aca8 100644 --- a/specs/test_formats/ssz_generic/README.md +++ b/specs/test_formats/ssz_generic/README.md @@ -1,20 +1,197 @@ # SSZ, generic tests This set of test-suites provides general testing for SSZ: - to instantiate any container/list/vector/other type from binary data. + to decode any container/list/vector/other type from binary data, encode it back, and compute the hash-tree-root. -Since SSZ is in a development-phase, the full suite of features is not covered yet. -Note that these tests are based on the older SSZ package. -The tests are still relevant, but limited in scope: - more complex object encodings have changed since the original SSZ testing. +This test collection for general-purpose SSZ is experimental. +The `ssz_static` suite is the required minimal support for SSZ, and should be prioritized. -A minimal but useful series of tests covering `uint` encoding and decoding is provided. -This is a direct port of the older SSZ `uint` tests (minus outdated test cases). +The `ssz_generic` tests are split up into different handler, each specialized into a SSZ type: -Test format documentation can be found here: [uint test format](./uint.md). +- Vectors + - `basic_vector` + - `complex_vector` *not supported yet* +- List + - `basic_list` *not supported yet* + - `complex_list` *not supported yet* +- Bitfields + - `bitvector` + - `bitlist` +- Basic types + - `boolean` + - `uints` +- Containers + - `containers` -*Note*: The current Phase 0 spec does not use larger uints, and uses byte vectors (fixed length) instead to represent roots etc. -The exact uint lengths to support may be redefined in the future. -Extension of the SSZ tests collection is planned, with an update to the new spec-maintained `minimal_ssz.py`; - see CI/testing issues for progress tracking. +## Format + +For each type, a `valid` and a `invalid` suite is implemented. +The cases have the same format, but those in the `invalid` suite only declare a subset of the data a test in the `valid` declares. + +Each of the handlers encodes the SSZ type declaration in the file-name. See [Type Declarations](#type-declarations). + +### `valid` + +Valid has 3 parts: `meta.yaml`, `serialized.ssz`, `value.yaml` + +### `meta.yaml` + +Valid ssz objects can have a hash-tree-root, and for some types also a signing-root. +The expected roots are encoded into the metadata yaml: + +```yaml +root: Bytes32 -- Hash-tree-root of the object +signing_root: Bytes32 -- Signing-root of the object +``` + +The `Bytes32` is encoded as a string, hexadecimal encoding, prefixed with `0x`. + +### `serialized.ssz` + +The serialized form of the object, as raw SSZ bytes. + +### `value.yaml` + +The object, encoded as a YAML structure. Using the same familiar encoding as YAML data in the other test suites. + +### Conditions + +The conditions are the same for each type: + +- Encoding: After encoding the given `value` object, the output should match `serialized`. +- Decoding: After decoding the given `serialized` bytes, it should match the `value` object. +- Hash-tree-root: the root should match the root declared in the metadata. +- Signing-root: if present in metadata, the signing root of the object should match the container. + +## `invalid` + +Test cases in the `invalid` suite only include the `serialized.ssz` + +#### Condition + +Unlike the `valid` suite, invalid encodings do not have any `value` or hash tree root. +The `serialized` data should simply not be decoded without raising an error. + +Note that for some type declarations in the invalid suite, the type itself may technically be invalid. +This is a valid way of detecting `invalid` data too. E.g. a 0-length basic vector. + + +## Type declarations + +Most types are not as static, and reasonably be constructed during test runtime from the test case name. +Formats are listed below. + +For each test case, an additional `_{extra...}` may be appended to the name, + where `{extra...}` contains a human readable indication of the test case contents for debugging purposes. + +### `basic_vector` + +``` +Template: + +vec_{element type}_{length} + +Data: + +{element type}: bool, uint8, uint16, uint32, uint64, uint128, uint256 + +{length}: an unsigned integer +``` + + +### `bitlist` + +``` +Template: + +bitlist_{limit} + +Data: + +{limit}: the list limit, in bits, of the bitlist. Does not include the length-delimiting bit in the serialized form. +``` + + +### `bitvector` + +``` +Template: + +bitvec_{length} + +Data: + +{length}: the length, in bits, of the bitvector. +``` + +### `boolean` + +A boolean has no type variations. Instead, file names just plainly describe the contents for debugging. + +### `uints` + +``` +Template: + +uint_{size} + +Data: + +{size}: the uint size: 8, 16, 32, 64, 128 or 256. +``` + +### `containers` + +Containers are more complicated than the other types. Instead, a set of pre-defined container structures is referenced: + +``` +Template: + +{container name} + +Data: + +{container name}: Any of the container names listed below (exluding the `(Container)` python super type) +``` + +```python + +class SingleFieldTestStruct(Container): + A: byte + + +class SmallTestStruct(Container): + A: uint16 + B: uint16 + + +class FixedTestStruct(Container): + A: uint8 + B: uint64 + C: uint32 + + +class VarTestStruct(Container): + A: uint16 + B: List[uint16, 1024] + C: uint8 + + +class ComplexTestStruct(Container): + A: uint16 + B: List[uint16, 128] + C: uint8 + D: Bytes[256] + E: VarTestStruct + F: Vector[FixedTestStruct, 4] + G: Vector[VarTestStruct, 2] + + +class BitsStruct(Container): + A: Bitlist[5] + B: Bitvector[2] + C: Bitvector[1] + D: Bitlist[6] + E: Bitvector[8] +``` diff --git a/specs/test_formats/ssz_generic/uint.md b/specs/test_formats/ssz_generic/uint.md deleted file mode 100644 index fd7cf3221..000000000 --- a/specs/test_formats/ssz_generic/uint.md +++ /dev/null @@ -1,19 +0,0 @@ -# Test format: SSZ uints - -SSZ supports encoding of uints up to 32 bytes. These are considered to be basic types. - -## Test case format - -```yaml -type: "uintN" -- string, where N is one of [8, 16, 32, 64, 128, 256] -valid: bool -- expected validity of the input data -value: string -- string, decimal encoding, to support up to 256 bit integers -ssz: bytes -- string, input data, hex encoded, with prefix 0x -tags: List[string] -- description of test case, in the form of a list of labels -``` - -## Condition - -Two-way testing can be implemented in the test-runner: -- Encoding: After encoding the given input number `value`, the output should match `ssz` -- Decoding: After decoding the given `ssz` bytes, it should match the input number `value` From 0c5153d3f0e5a8ea6a0b8366328ed53a71f406dc Mon Sep 17 00:00:00 2001 From: protolambda Date: Tue, 30 Jul 2019 22:17:44 +0200 Subject: [PATCH 050/130] add coment about test generation config filtering to makefile --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index ecd35e84e..eeaed8898 100644 --- a/Makefile +++ b/Makefile @@ -103,6 +103,7 @@ define run_generator # Install all the necessary requirements # Run the generator. The generator is assumed to have an "main.py" file. # We output to the tests dir (generator program should accept a "-o " argument. + # `-l minimal general` can be added to the generator call to filter to smaller configs, when testing. echo "generator $(1) started"; \ mkdir -p $(TEST_VECTOR_DIR); \ cd $(GENERATOR_DIR)/$(1); \ From 9f0a601a405ec3f20180c59e90745cee5006738f Mon Sep 17 00:00:00 2001 From: Diederik Loerakker Date: Wed, 31 Jul 2019 02:02:50 +0200 Subject: [PATCH 051/130] Apply suggestions from code review Co-Authored-By: Danny Ryan --- specs/test_formats/genesis/initialization.md | 2 +- specs/test_formats/operations/README.md | 2 +- specs/test_formats/ssz_generic/README.md | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/specs/test_formats/genesis/initialization.md b/specs/test_formats/genesis/initialization.md index 585a6f566..59ba6abe1 100644 --- a/specs/test_formats/genesis/initialization.md +++ b/specs/test_formats/genesis/initialization.md @@ -27,7 +27,7 @@ deposits_count: int -- Amount of deposits. A series of files, with `` in range `[0, deposits_count)`. Deposits need to be processed in order. Each file is a YAML-encoded `Deposit` object. -Each deposit is also available as `deposits_.ssz` +Each deposit is also available as `deposits_.ssz`. ### `state.yaml` diff --git a/specs/test_formats/operations/README.md b/specs/test_formats/operations/README.md index 7b0fca5f6..7b963b520 100644 --- a/specs/test_formats/operations/README.md +++ b/specs/test_formats/operations/README.md @@ -43,7 +43,7 @@ Operations: |-------------------------|----------------------|----------------------|--------------------------------------------------------| | `attestation` | `Attestation` | `attestation` | `process_attestation(state, attestation)` | | `attester_slashing` | `AttesterSlashing` | `attester_slashing` | `process_attester_slashing(state, attester_slashing)` | -| `block_header` | `Block` | **`block** | `process_block_header(state, block)` | +| `block_header` | `Block` | **`block`** | `process_block_header(state, block)` | | `deposit` | `Deposit` | `deposit` | `process_deposit(state, deposit)` | | `proposer_slashing` | `ProposerSlashing` | `proposer_slashing` | `process_proposer_slashing(state, proposer_slashing)` | | `transfer` | `Transfer` | `transfer` | `process_transfer(state, transfer)` | diff --git a/specs/test_formats/ssz_generic/README.md b/specs/test_formats/ssz_generic/README.md index a47d1aca8..2096dae7d 100644 --- a/specs/test_formats/ssz_generic/README.md +++ b/specs/test_formats/ssz_generic/README.md @@ -26,7 +26,7 @@ The `ssz_generic` tests are split up into different handler, each specialized in ## Format -For each type, a `valid` and a `invalid` suite is implemented. +For each type, a `valid` and an `invalid` suite is implemented. The cases have the same format, but those in the `invalid` suite only declare a subset of the data a test in the `valid` declares. Each of the handlers encodes the SSZ type declaration in the file-name. See [Type Declarations](#type-declarations). @@ -79,7 +79,7 @@ This is a valid way of detecting `invalid` data too. E.g. a 0-length basic vecto ## Type declarations -Most types are not as static, and reasonably be constructed during test runtime from the test case name. +Most types are not as static, and can reasonably be constructed during test runtime from the test case name. Formats are listed below. For each test case, an additional `_{extra...}` may be appended to the name, From d0985dbb5b6059c0e14bcc6cf78b0745c369537f Mon Sep 17 00:00:00 2001 From: Diederik Loerakker Date: Wed, 31 Jul 2019 02:05:52 +0200 Subject: [PATCH 052/130] Apply suggestions from code review Co-Authored-By: Danny Ryan --- specs/test_formats/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/specs/test_formats/README.md b/specs/test_formats/README.md index 0b9d64fa5..aaf636d2c 100644 --- a/specs/test_formats/README.md +++ b/specs/test_formats/README.md @@ -175,11 +175,11 @@ Separation of configuration and tests aims to: - Prevent duplication of configuration - Make all tests easy to upgrade (e.g. when a new config constant is introduced) - Clearly define which constants to use -- Shareable between clients, for cross-client short- or long-lived testnets -- Minimize the amounts of different constants permutations to compile as a client. +- Be easily shareable between clients, for cross-client short- or long-lived testnets +- Minimize the amount of different constants permutations to compile as a client. *Note*: Some clients prefer compile-time constants and optimizations. They should compile for each configuration once, and run the corresponding tests per build target. -- Includes constants to coordinate forking with. +- Include constants to coordinate forking with The format is described in [`/configs`](../../configs/README.md#format). From 18fc4edfd48fa7a3d0947e2d4c5a51da03aa9f7f Mon Sep 17 00:00:00 2001 From: protolambda Date: Wed, 31 Jul 2019 02:16:41 +0200 Subject: [PATCH 053/130] reword to 'also available as .ssz' --- specs/test_formats/epoch_processing/README.md | 4 ++-- specs/test_formats/genesis/initialization.md | 2 +- specs/test_formats/operations/README.md | 6 +++--- specs/test_formats/sanity/blocks.md | 4 ++-- specs/test_formats/sanity/slots.md | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/specs/test_formats/epoch_processing/README.md b/specs/test_formats/epoch_processing/README.md index 437604cdf..d5b5e2c6d 100644 --- a/specs/test_formats/epoch_processing/README.md +++ b/specs/test_formats/epoch_processing/README.md @@ -19,14 +19,14 @@ bls_setting: int -- see general test-format spec. A YAML-encoded `BeaconState`, the state before running the epoch sub-transition. -A `pre.ssz` is also available as substitute. +Also available as `pre.ssz`. ### `post.yaml` A YAML-encoded `BeaconState`, the state after applying the epoch sub-transition. -A `post.ssz` is also available as substitute. +Also available as `post.ssz`. ## Condition diff --git a/specs/test_formats/genesis/initialization.md b/specs/test_formats/genesis/initialization.md index 59ba6abe1..17c87f66e 100644 --- a/specs/test_formats/genesis/initialization.md +++ b/specs/test_formats/genesis/initialization.md @@ -8,7 +8,7 @@ Tests the initialization of a genesis state based on Eth1 data. A `Bytes32` hex encoded, with prefix 0x. The root of the Eth-1 block. -A `eth1_block_hash.ssz` is available as substitute. +Also available as `eth1_block_hash.ssz`. ### `eth1_timestamp.yaml` diff --git a/specs/test_formats/operations/README.md b/specs/test_formats/operations/README.md index 7b963b520..15c6b838f 100644 --- a/specs/test_formats/operations/README.md +++ b/specs/test_formats/operations/README.md @@ -16,19 +16,19 @@ bls_setting: int -- see general test-format spec. A YAML-encoded `BeaconState`, the state before applying the operation. -A `pre.ssz` is also available as substitute. +Also available as `pre.ssz`. ### `.yaml` A YAML-encoded operation object, e.g. a `ProposerSlashing`, or `Deposit`. -A `.ssz` is also available as substitute. +Also available as `.ssz`. ### `post.yaml` A YAML-encoded `BeaconState`, the state after applying the operation. No value if operation processing is aborted. -A `post.ssz` is also available as substitute. +Also available as `post.ssz`. ## Condition diff --git a/specs/test_formats/sanity/blocks.md b/specs/test_formats/sanity/blocks.md index 1a32105a3..2b50d19ca 100644 --- a/specs/test_formats/sanity/blocks.md +++ b/specs/test_formats/sanity/blocks.md @@ -17,7 +17,7 @@ blocks_count: int -- the number of blocks processed in this test. A YAML-encoded `BeaconState`, the state before running the block transitions. -A `pre.ssz` is also available as substitute. +Also available as `pre.ssz`. ### `blocks_.yaml` @@ -33,7 +33,7 @@ Each block is also available as `blocks_.ssz` A YAML-encoded `BeaconState`, the state after applying the block transitions. -A `post.ssz` is also available as substitute. +Also available as `post.ssz`. ## Condition diff --git a/specs/test_formats/sanity/slots.md b/specs/test_formats/sanity/slots.md index c41a56c49..353287ee2 100644 --- a/specs/test_formats/sanity/slots.md +++ b/specs/test_formats/sanity/slots.md @@ -16,7 +16,7 @@ bls_setting: int -- see general test-format spec. A YAML-encoded `BeaconState`, the state before running the transitions. -A `pre.ssz` is also available as substitute. +Also available as `pre.ssz`. ### `slots.yaml` @@ -27,7 +27,7 @@ An integer. The amount of slots to process (i.e. the difference in slots between A YAML-encoded `BeaconState`, the state after applying the transitions. -A `post.ssz` is also available as substitute. +Also available as `post.ssz`. ### Processing From 058e63654d41df84a3e7bffd7bda3e6967fd11e0 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Wed, 31 Jul 2019 17:44:33 +0800 Subject: [PATCH 054/130] Fix typo --- specs/core/1_shard-data-chains.md | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index 1c94741d8..d9eff358e 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -22,21 +22,24 @@ - [`ShardBlockSignatures`](#shardblocksignatures) - [`ShardBlockCore`](#shardblockcore) - [`ExtendedShardBlockCore`](#extendedshardblockcore) + - [`ShardState`](#shardstate) + - [`ShardReceiptDelta`](#shardreceiptdelta) - [Helper functions](#helper-functions) - - [`compute_epoch_of_shard_slot`](#compute_epoch_of_shard_slot) - [`compute_slot_of_shard_slot`](#compute_slot_of_shard_slot) + - [`compute_epoch_of_shard_slot`](#compute_epoch_of_shard_slot) - [`get_shard_period_start_epoch`](#get_shard_period_start_epoch) - [`get_period_committee`](#get_period_committee) - [`get_persistent_committee`](#get_persistent_committee) - [`get_shard_block_proposer_index`](#get_shard_block_proposer_index) - - [`get_shard_block_attester_committee`](#get_shard_block_attester_committee) - [`get_shard_header`](#get_shard_header) - [`pad`](#pad) - [`flatten_shard_header`](#flatten_shard_header) - [`compute_crosslink_data_root`](#compute_crosslink_data_root) - [`get_default_shard_state`](#get_default_shard_state) - [Object validity](#object-validity) - - [Shard blocks](#shard-blocks) + - [Shard block validation: preliminary](#shard-block-validation-preliminary) + - [Shard state transition function helpers](#shard-state-transition-function-helpers) + - [Shard state transition function](#shard-state-transition-function) - [Beacon attestations](#beacon-attestations) - [Shard fork choice rule](#shard-fork-choice-rule) @@ -332,8 +335,8 @@ def compute_crosslink_data_root(blocks: Sequence[ShardBlock]) -> Hash: ```python def get_default_shard_state(beacon_state: BeaconState, shard: Shard) -> ShardState: - earlier_committee = get_period_committee(beacon_state, PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD * 2, shard) - later_committee = get_period_committee(beacon_state, PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD, shard) + earlier_committee = get_period_committee(beacon_state, PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD * 2, shard) + later_committee = get_period_committee(beacon_state, PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD, shard) return ShardState( basefee=1, shard=shard, @@ -360,7 +363,7 @@ Note that these acceptance conditions depend on the canonical beacon chain; when ### Shard state transition function helpers ```python -def add_reward(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, delta: uint): +def add_reward(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, delta: Gwei) -> None: epoch = compute_epoch_of_shard_slot(state.slot) earlier_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=2), state.shard) later_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=1), state.shard) @@ -373,7 +376,7 @@ def add_reward(state: ShardState, beacon_state: BeaconState, index: ValidatorInd ``` ```python -def add_fee(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, delta: uint): +def add_fee(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, delta: Gwei) -> None: epoch = compute_epoch_of_shard_slot(state.slot) earlier_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=2), state.shard) later_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=1), state.shard) @@ -388,7 +391,7 @@ def add_fee(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, ### Shard state transition function ```python -def shard_state_transition(state: ShardState, beacon_state: BeaconState, block: ShardBlock): +def shard_state_transition(state: ShardState, beacon_state: BeaconState, block: ShardBlock) -> None: assert block.core.slot > state.slot for slot in range(state.slot, block.core.slot): shard_slot_transition(state, beacon_state) @@ -396,7 +399,7 @@ def shard_state_transition(state: ShardState, beacon_state: BeaconState, block: ``` ```python -def shard_slot_transition(state: ShardState, beacon_state: BeaconState): +def shard_slot_transition(state: ShardState, beacon_state: BeaconState) -> None: # Correct saved state root if state.most_recent_block_core.state_root == ZERO_HASH: state.most_recent_block_core.state_root = hash_tree_root(state) @@ -408,7 +411,7 @@ def shard_slot_transition(state: ShardState, beacon_state: BeaconState): state.history_acc[depth] = h # Period transitions - if (state.slot + 1) % (SHARD_SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD) == 0: + if (state.slot + 1) % (SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD) == 0: epoch = compute_epoch_of_shard_slot(state.slot) earlier_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=2), state.shard) later_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=1), state.shard) @@ -426,7 +429,7 @@ def shard_slot_transition(state: ShardState, beacon_state: BeaconState): ``` ```python -def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: ShardBlock): +def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: ShardBlock) -> None: # Check slot number assert candidate.core.slot == state.slot @@ -472,12 +475,12 @@ def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: # Process and update block data fees add_fee(state, beacon_state, proposer_index, state.basefee * len(block.core.data) // SHARD_BLOCK_SIZE_LIMIT) - QUOTIENT = SHARD_BLOCK_SIZE_LIMIT * BASEFEE_ADJUSTMENT_FACTOR) + QUOTIENT = SHARD_BLOCK_SIZE_LIMIT * BASEFEE_ADJUSTMENT_FACTOR if len(block.core.data) > SHARD_BLOCK_SIZE_TARGET: state.basefee += min(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT) elif len(block.core.data) < SHARD_BLOCK_SIZE_TARGET: state.basefee -= min(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT) - state.basefee = max(1, min(EFFECTIVE_BALANCE_INCREMENT // EPOCHS_PER_SHARD_PERIOD // SHARD_SLOTS_PER_EPOCH, state.basefee)) + state.basefee = max(1, min(EFFECTIVE_BALANCE_INCREMENT // EPOCHS_PER_SHARD_PERIOD // SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH, state.basefee)) # Check total bytes assert block.core.total_bytes == state.most_recent_block_core.total_bytes + len(block.core.data) From f263b718759e6e94c9c2be5dfc3b3df2945083fa Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Wed, 31 Jul 2019 17:50:55 +0800 Subject: [PATCH 055/130] ZERO_HASH -> Hash() --- specs/core/1_shard-data-chains.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index d9eff358e..4d4ab3897 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -354,7 +354,7 @@ def get_default_shard_state(beacon_state: BeaconState, shard: Shard) -> ShardSta Accept a shard block `block` only if all of the following are correct: -* Either `block.core.parent_root == ZERO_HASH` or a block `parent` such that `hash_tree_root(parent.core) == block.core.parent_root` has already been accepted. +* Either `block.core.parent_root == Hash()` or a block `parent` such that `hash_tree_root(parent.core) == block.core.parent_root` has already been accepted. * `block.core.beacon_chain_root == get_block_root(head_beacon_state, compute_epoch_of_shard_slot(parent.core.slot))` where `head_beacon_state` is the current beacon chain head state. Alternatively phrased, a beacon chain block `beacon_ref` such that `signing_root(beacon_ref) == block.core.beacon_chain_root` has already been accepted and is part of the canonical chain, and no block with slot `beacon_ref.slot < slot <= compute_start_slot_of_epoch(compute_epoch_of_shard_slot(parent.core.slot))` is part of the canonical chain. * Let `beacon_state` be the state where `beacon_ref.state_root == hash_tree_root(beacon_state)`. Let `prev_state` be the post-state of the `parent` if the `parent` exists, otherwise let it be `get_default_shard_state(beacon_state, shard)` (defined below). `block.core.state_root` must equal the `hash_tree_root` of the state after applying `shard_state_transition(prev_state, beacon_state, block)`. @@ -401,7 +401,7 @@ def shard_state_transition(state: ShardState, beacon_state: BeaconState, block: ```python def shard_slot_transition(state: ShardState, beacon_state: BeaconState) -> None: # Correct saved state root - if state.most_recent_block_core.state_root == ZERO_HASH: + if state.most_recent_block_core.state_root == Hash(): state.most_recent_block_core.state_root = hash_tree_root(state) # Save states in history accumulator @@ -424,7 +424,7 @@ def shard_slot_transition(state: ShardState, beacon_state: BeaconState) -> None: state.later_committee_rewards = [REWARD_COEFFICIENT_BASE for _ in range(len(later_committee))], state.later_committee_fees = [0 for _ in range(len(later_committee))], else: - state.receipt_root = ZERO_HASH + state.receipt_root = Hash() state.slot += 1 ``` @@ -491,7 +491,7 @@ def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: beacon_chain_root=block.core.beacon_chain_root, parent_root=block.core.parent_root, data_root=block.core.data_root, - state_root=ZERO_HASH, + state_root=Hash(), total_bytes=block.core.total_bytes, attester_bitfield=block.core.attester_bitfield ) From fe2adfa0e231519adc60fb3c8fc2e89abcecc11e Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Wed, 31 Jul 2019 18:18:07 +0800 Subject: [PATCH 056/130] Fix many typos and lint errors --- specs/core/1_shard-data-chains.md | 102 +++++++++++++++++++----------- 1 file changed, 65 insertions(+), 37 deletions(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index 4d4ab3897..bc8a8817f 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -335,8 +335,16 @@ def compute_crosslink_data_root(blocks: Sequence[ShardBlock]) -> Hash: ```python def get_default_shard_state(beacon_state: BeaconState, shard: Shard) -> ShardState: - earlier_committee = get_period_committee(beacon_state, PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD * 2, shard) - later_committee = get_period_committee(beacon_state, PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD, shard) + earlier_committee = get_period_committee( + beacon_state, + Epoch(PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD * 2), + shard, + ) + later_committee = get_period_committee( + beacon_state, + Epoch(PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD), + shard, + ) return ShardState( basefee=1, shard=shard, @@ -344,7 +352,7 @@ def get_default_shard_state(beacon_state: BeaconState, shard: Shard) -> ShardSta earlier_committee_rewards=[REWARD_COEFFICIENT_BASE for _ in range(len(earlier_committee))], later_committee_rewards=[REWARD_COEFFICIENT_BASE for _ in range(len(later_committee))], earlier_committee_fees=[0 for _ in range(len(earlier_committee))], - later_committee_fees=[0 for _ in range(len(later_committee))], + later_committee_fees=[0 for _ in range(len(later_committee))], ) ``` @@ -365,7 +373,11 @@ Note that these acceptance conditions depend on the canonical beacon chain; when ```python def add_reward(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, delta: Gwei) -> None: epoch = compute_epoch_of_shard_slot(state.slot) - earlier_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=2), state.shard) + earlier_committee = get_period_committee( + beacon_state, + get_shard_period_start_epoch(epoch, lookback=2), + state.shard, + ) later_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=1), state.shard) if index in earlier_committee: state.earlier_committee_rewards[earlier_committee.index(index)] += delta @@ -403,21 +415,29 @@ def shard_slot_transition(state: ShardState, beacon_state: BeaconState) -> None: # Correct saved state root if state.most_recent_block_core.state_root == Hash(): state.most_recent_block_core.state_root = hash_tree_root(state) - + # Save states in history accumulator depth = 0 h = hash_tree_root(state) while state.slot % 2**depth == 0: state.history_acc[depth] = h - + # Period transitions if (state.slot + 1) % (SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD) == 0: epoch = compute_epoch_of_shard_slot(state.slot) - earlier_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=2), state.shard) - later_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=1), state.shard) + earlier_committee = get_period_committee( + beacon_state, + get_shard_period_start_epoch(epoch, lookback=2), + state.shard, + ) + later_committee = get_period_committee( + beacon_state, + get_shard_period_start_epoch(epoch, lookback=1), + state.shard, + ) state.receipt_root = hash_tree_root(List[ShardReceiptDelta, PLACEHOLDER]([ ShardReceiptDelta(index, state.earlier_committee_rewards[i], state.earlier_committee_fees[i]) - for i, index in enumerate(committee) + for i, index in enumerate(earlier_committee) ])) state.earlier_committee_rewards = state.later_committee_rewards state.earlier_committee_fees = state.later_committee_fees @@ -425,66 +445,74 @@ def shard_slot_transition(state: ShardState, beacon_state: BeaconState) -> None: state.later_committee_fees = [0 for _ in range(len(later_committee))], else: state.receipt_root = Hash() - state.slot += 1 + state.slot += ShardSlot(1) ``` ```python def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: ShardBlock) -> None: # Check slot number - assert candidate.core.slot == state.slot - + assert block.core.slot == state.slot + # Check parent block - if candidate.core.parent_root != Hash(): - assert candidate.core.parent_root == hash_tree_root(state.most_recent_block_core) - + if block.core.parent_root != Hash(): + assert block.core.parent_root == hash_tree_root(state.most_recent_block_core) + # Calculate base reward total_balance = get_total_active_balance(beacon_state) - base_reward = Gwei(REWARD_COEFFICIENT_BASE * BASE_REWARD_FACTOR // integer_squareroot(total_balance) // BASE_REWARDS_PER_EPOCH) - + base_reward = Gwei( + REWARD_COEFFICIENT_BASE * BASE_REWARD_FACTOR // integer_squareroot(total_balance) // BASE_REWARDS_PER_EPOCH + ) + # Check attestations - attester_committee = get_persistent_committee(beacon_state, shard, block.core.slot) + attester_committee = get_persistent_committee(beacon_state, state.shard, block.core.slot) pubkeys = [] attestations = 0 - + for i, index in enumerate(attester_committee): if block.core.attester_bitfield[i]: pubkeys.append(beacon_state.validators[index].pubkey) add_reward(state, beacon_state, index, base_reward) attestations += 1 - + for i in range(len(attester_committee), MAX_PERSISTENT_COMMITTEE_SIZE): assert block.attester_bitfield[i] is False - + assert bls_verify( pubkey=bls_aggregate_pubkeys(pubkeys), - message_hash=candidate.core.parent_root, - signature=candidate.signatures.attestation_signature, - domain=get_domain(beacon_state, DOMAIN_SHARD_ATTESTER, compute_epoch_of_shard_slot(candidate.core.slot)) + message_hash=block.core.parent_root, + signature=block.signatures.attestation_signature, + domain=get_domain(beacon_state, DOMAIN_SHARD_ATTESTER, compute_epoch_of_shard_slot(block.core.slot)) ) # Check proposer - proposer_index = get_shard_block_proposer_index(beacon_state, shard, candidate.core.slot) + proposer_index = get_shard_block_proposer_index(beacon_state, state.shard, block.core.slot) assert proposer_index is not None - add_reward(state, beacon_state, proposer_index, attestations * base_reward // PROPOSER_REWARD_QUOTIENT) + add_reward(state, beacon_state, proposer_index, Gwei(attestations * base_reward // PROPOSER_REWARD_QUOTIENT)) assert bls_verify( pubkey=beacon_state.validators[proposer_index].pubkey, - message_hash=hash_tree_root(candidate.core), - signature=candidate.signatures.proposer_signature, - domain=get_domain(beacon_state, DOMAIN_SHARD_PROPOSER, compute_epoch_of_shard_slot(candidate.core.slot)), + message_hash=hash_tree_root(block.core), + signature=block.signatures.proposer_signature, + domain=get_domain(beacon_state, DOMAIN_SHARD_PROPOSER, compute_epoch_of_shard_slot(block.core.slot)), ) - + # Process and update block data fees - add_fee(state, beacon_state, proposer_index, state.basefee * len(block.core.data) // SHARD_BLOCK_SIZE_LIMIT) + add_fee(state, beacon_state, proposer_index, Gwei(state.basefee * len(block.core.data) // SHARD_BLOCK_SIZE_LIMIT)) QUOTIENT = SHARD_BLOCK_SIZE_LIMIT * BASEFEE_ADJUSTMENT_FACTOR if len(block.core.data) > SHARD_BLOCK_SIZE_TARGET: - state.basefee += min(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT) + state.basefee += Gwei(min(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT)) elif len(block.core.data) < SHARD_BLOCK_SIZE_TARGET: - state.basefee -= min(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT) - state.basefee = max(1, min(EFFECTIVE_BALANCE_INCREMENT // EPOCHS_PER_SHARD_PERIOD // SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH, state.basefee)) - + state.basefee -= Gwei(min(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT)) + state.basefee = Gwei(max( + 1, + min( + EFFECTIVE_BALANCE_INCREMENT // EPOCHS_PER_SHARD_PERIOD // SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH, + state.basefee, + ) + )) + # Check total bytes assert block.core.total_bytes == state.most_recent_block_core.total_bytes + len(block.core.data) - + # Update in-state block header state.most_recent_block_core = ShardBlockCore( slot=block.core.slot, @@ -495,7 +523,7 @@ def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: total_bytes=block.core.total_bytes, attester_bitfield=block.core.attester_bitfield ) - + # Check state root assert hash_tree_root(state) == block.core.state_root ``` From 13d6a31c5cf26e7fdbb22fc56dd2a7fe80db31e6 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 1 Aug 2019 00:27:31 +0800 Subject: [PATCH 057/130] misc fix --- specs/core/1_shard-data-chains.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index bc8a8817f..0fcbfe991 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -282,9 +282,9 @@ def get_shard_header(block: ShardBlock) -> ShardBlockHeader: data_root=hash_tree_root(block.core.data), state_root=block.core.state_root, total_bytes=block.core.total_bytes, - attester_bitfield=block.core.attester_bitfield + attester_bitfield=block.core.attester_bitfield, ), - signatures=block.signatures + signatures=block.signatures, ) ``` @@ -475,7 +475,7 @@ def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: attestations += 1 for i in range(len(attester_committee), MAX_PERSISTENT_COMMITTEE_SIZE): - assert block.attester_bitfield[i] is False + assert block.core.attester_bitfield[i] is False or block.core.attester_bitfield[i] == 0 # TODO: FIX Bitvector assert bls_verify( pubkey=bls_aggregate_pubkeys(pubkeys), @@ -521,7 +521,7 @@ def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: data_root=block.core.data_root, state_root=Hash(), total_bytes=block.core.total_bytes, - attester_bitfield=block.core.attester_bitfield + attester_bitfield=block.core.attester_bitfield, ) # Check state root From e08c365e2fa3f8a8200258a59230b2b6b37f9653 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Wed, 31 Jul 2019 17:25:55 -0400 Subject: [PATCH 058/130] Update specs/core/1_shard-data-chains.md Co-Authored-By: Hsiao-Wei Wang --- specs/core/1_shard-data-chains.md | 1 + 1 file changed, 1 insertion(+) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index 0fcbfe991..266e46d63 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -421,6 +421,7 @@ def shard_slot_transition(state: ShardState, beacon_state: BeaconState) -> None: h = hash_tree_root(state) while state.slot % 2**depth == 0: state.history_acc[depth] = h + depth += 1 # Period transitions if (state.slot + 1) % (SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD) == 0: From b57aed2380f8840cc96e7884f7eb4bd31c3d3d04 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Wed, 31 Jul 2019 21:26:47 -0400 Subject: [PATCH 059/130] A few fixes --- specs/core/1_shard-data-chains.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index 266e46d63..3f54390d6 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -199,7 +199,7 @@ def compute_epoch_of_shard_slot(slot: ShardSlot) -> Epoch: ### `get_shard_period_start_epoch` ```python -def get_shard_period_start_epoch(epoch: Epoch, lookback: uint64=0) -> Epoch: +def get_shard_period_start_epoch(epoch: Epoch, lookback: int=0) -> Epoch: return Epoch(epoch - (epoch % EPOCHS_PER_SHARD_PERIOD) - lookback * EPOCHS_PER_SHARD_PERIOD) ``` @@ -337,12 +337,12 @@ def compute_crosslink_data_root(blocks: Sequence[ShardBlock]) -> Hash: def get_default_shard_state(beacon_state: BeaconState, shard: Shard) -> ShardState: earlier_committee = get_period_committee( beacon_state, - Epoch(PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD * 2), + PHASE_1_FORK_EPOCH - EPOCHS_PER_SHARD_PERIOD * 2, shard, ) later_committee = get_period_committee( beacon_state, - Epoch(PHASE_1_FORK_SLOT - SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH * EPOCHS_PER_SHARD_PERIOD), + PHASE_1_FORK_EPOCH - EPOCHS_PER_SHARD_PERIOD * 2, shard, ) return ShardState( @@ -519,7 +519,7 @@ def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: slot=block.core.slot, beacon_chain_root=block.core.beacon_chain_root, parent_root=block.core.parent_root, - data_root=block.core.data_root, + data_root=hash_tree_root(block.core.data), state_root=Hash(), total_bytes=block.core.total_bytes, attester_bitfield=block.core.attester_bitfield, From 52705f6fab88bfef04dfbb0686830111c8e2fe6b Mon Sep 17 00:00:00 2001 From: vbuterin Date: Wed, 31 Jul 2019 21:28:07 -0400 Subject: [PATCH 060/130] Quick fix --- specs/core/1_shard-data-chains.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index 3f54390d6..5348322c4 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -500,9 +500,9 @@ def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: add_fee(state, beacon_state, proposer_index, Gwei(state.basefee * len(block.core.data) // SHARD_BLOCK_SIZE_LIMIT)) QUOTIENT = SHARD_BLOCK_SIZE_LIMIT * BASEFEE_ADJUSTMENT_FACTOR if len(block.core.data) > SHARD_BLOCK_SIZE_TARGET: - state.basefee += Gwei(min(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT)) + state.basefee += Gwei(max(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT)) elif len(block.core.data) < SHARD_BLOCK_SIZE_TARGET: - state.basefee -= Gwei(min(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT)) + state.basefee -= Gwei(max(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT)) state.basefee = Gwei(max( 1, min( From 7a4a136d6cecc74b57fc0aed7166428ad5a6c674 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 1 Aug 2019 13:32:37 +0800 Subject: [PATCH 061/130] Fix `later_committee` --- specs/core/1_shard-data-chains.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index 5348322c4..7286692cb 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -342,7 +342,7 @@ def get_default_shard_state(beacon_state: BeaconState, shard: Shard) -> ShardSta ) later_committee = get_period_committee( beacon_state, - PHASE_1_FORK_EPOCH - EPOCHS_PER_SHARD_PERIOD * 2, + PHASE_1_FORK_EPOCH - EPOCHS_PER_SHARD_PERIOD, shard, ) return ShardState( From ce3df38028da15f982980bda1170644f00f22396 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 1 Aug 2019 14:17:05 +0800 Subject: [PATCH 062/130] Some updates: 1. Clean up configurations 2. Add `HISTORY_ACCUMULATOR_VECTOR` 3. Add `validate_state_root` flag in `shard_state_transition` for testing 4. Rename `history_acc` to `history_accumulator` --- specs/core/1_shard-data-chains.md | 47 ++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index 7286692cb..0ef54570a 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -14,6 +14,8 @@ - [Misc](#misc) - [Initial values](#initial-values) - [Time parameters](#time-parameters) + - [State list lengths](#state-list-lengths) + - [Rewards and penalties](#rewards-and-penalties) - [Signature domain types](#signature-domain-types) - [TODO PLACEHOLDER](#todo-placeholder) - [Data structures](#data-structures) @@ -63,13 +65,11 @@ We define the following Python custom types for type hinting and readability: | Name | Value | | - | - | -| `SHARD_HEADER_SIZE` | `2**9` (= 512) | -| `SHARD_BLOCK_SIZE_LIMIT` | `2**16` (= 65,536) | -| `SHARD_BLOCK_SIZE_TARGET` | `2**14` (= 16,384) | | `SHARD_SLOTS_PER_BEACON_SLOT` | `2**1` (= 2) | | `MAX_PERSISTENT_COMMITTEE_SIZE` | `2**7` (= 128) | -| `REWARD_COEFFICIENT_BASE` | `2**20` ( = 1,048,576) | -| `BASEFEE_ADJUSTMENT_FACTOR` | `2**3` (= 8) | +| `SHARD_HEADER_SIZE` | `2**9` (= 512) | +| `SHARD_BLOCK_SIZE_TARGET` | `2**14` (= 16,384) | +| `SHARD_BLOCK_SIZE_LIMIT` | `2**16` (= 65,536) | ### Initial values @@ -77,7 +77,6 @@ We define the following Python custom types for type hinting and readability: | - | - | | `PHASE_1_FORK_EPOCH` | **TBD** | | `PHASE_1_FORK_SLOT` | **TBD** | -| `GENESIS_SHARD_SLOT` | 0 | ### Time parameters @@ -86,6 +85,19 @@ We define the following Python custom types for type hinting and readability: | `CROSSLINK_LOOKBACK` | `2**0` (= 1) | epochs | 6.4 minutes | | `EPOCHS_PER_SHARD_PERIOD` | `2**8` (= 256) | epochs | ~27 hours | +### State list lengths + +| Name | Value | Unit | +| - | - | :-: | +| `HISTORY_ACCUMULATOR_VECTOR` | `2**6` (= 64) | state tree maximum depth | + +### Rewards and penalties + +| Name | Value | +| - | - | +| `BASEFEE_ADJUSTMENT_FACTOR` | `2**3` (= 8) | +| `REWARD_COEFFICIENT_BASE` | `2**20` ( = 1,048,576) | + ### Signature domain types The following types are defined, mapping into `DomainType` (little endian): @@ -159,7 +171,7 @@ class ExtendedShardBlockCore(Container): ```python class ShardState(Container): - history_acc: Vector[Hash, 64] + history_accumulator: Vector[Hash, HISTORY_ACCUMULATOR_VECTOR] earlier_committee_rewards: List[uint64, MAX_PERSISTENT_COMMITTEE_SIZE] later_committee_rewards: List[uint64, MAX_PERSISTENT_COMMITTEE_SIZE] earlier_committee_fees: List[Gwei, MAX_PERSISTENT_COMMITTEE_SIZE] @@ -337,12 +349,12 @@ def compute_crosslink_data_root(blocks: Sequence[ShardBlock]) -> Hash: def get_default_shard_state(beacon_state: BeaconState, shard: Shard) -> ShardState: earlier_committee = get_period_committee( beacon_state, - PHASE_1_FORK_EPOCH - EPOCHS_PER_SHARD_PERIOD * 2, + Epoch(PHASE_1_FORK_EPOCH - EPOCHS_PER_SHARD_PERIOD * 2), shard, ) later_committee = get_period_committee( beacon_state, - PHASE_1_FORK_EPOCH - EPOCHS_PER_SHARD_PERIOD, + Epoch(PHASE_1_FORK_EPOCH - EPOCHS_PER_SHARD_PERIOD), shard, ) return ShardState( @@ -403,11 +415,14 @@ def add_fee(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, ### Shard state transition function ```python -def shard_state_transition(state: ShardState, beacon_state: BeaconState, block: ShardBlock) -> None: +def shard_state_transition(state: ShardState, + beacon_state: BeaconState, + block: ShardBlock, + validate_state_root: bool=False) -> None: assert block.core.slot > state.slot for slot in range(state.slot, block.core.slot): shard_slot_transition(state, beacon_state) - shard_block_transition(state, beacon_state, block) + shard_block_transition(state, beacon_state, block, validate_state_root=validate_state_root) ``` ```python @@ -420,7 +435,7 @@ def shard_slot_transition(state: ShardState, beacon_state: BeaconState) -> None: depth = 0 h = hash_tree_root(state) while state.slot % 2**depth == 0: - state.history_acc[depth] = h + state.history_accumulator[depth] = h depth += 1 # Period transitions @@ -450,7 +465,10 @@ def shard_slot_transition(state: ShardState, beacon_state: BeaconState) -> None: ``` ```python -def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: ShardBlock) -> None: +def shard_block_transition(state: ShardState, + beacon_state: BeaconState, + block: ShardBlock, + validate_state_root: bool) -> None: # Check slot number assert block.core.slot == state.slot @@ -526,7 +544,8 @@ def shard_block_transition(state: ShardState, beacon_state: BeaconState, block: ) # Check state root - assert hash_tree_root(state) == block.core.state_root + if validate_state_root: + assert block.core.state_root == hash_tree_root(state) ``` ### Beacon attestations From 3aba05e252298db960ace81c6113558c1f91d24c Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 1 Aug 2019 14:19:08 +0800 Subject: [PATCH 063/130] Add simple tests for `shard_state_transition` --- configs/constant_presets/minimal.yaml | 10 ++++ .../test/helpers/phase1/shard_block.py | 44 +++++++++++++-- .../shard_data_chain/test_shard_block.py | 54 ++++++++++++++++--- 3 files changed, 97 insertions(+), 11 deletions(-) diff --git a/configs/constant_presets/minimal.yaml b/configs/constant_presets/minimal.yaml index 34419a223..ab8aab3c4 100644 --- a/configs/constant_presets/minimal.yaml +++ b/configs/constant_presets/minimal.yaml @@ -129,3 +129,13 @@ DOMAIN_TRANSFER: 0x05000000 DOMAIN_CUSTODY_BIT_CHALLENGE: 0x06000000 DOMAIN_SHARD_PROPOSER: 0x80000000 DOMAIN_SHARD_ATTESTER: 0x81000000 + + +# Phase 1 +# --------------------------------------------------------------- +SHARD_SLOTS_PER_BEACON_SLOT: 2 +EPOCHS_PER_SHARD_PERIOD: 4 +# PHASE_1_FORK_EPOCH >= EPOCHS_PER_SHARD_PERIOD * 2 +PHASE_1_FORK_EPOCH: 8 +# PHASE_1_FORK_SLOT = PHASE_1_FORK_EPOCH * SHARD_SLOTS_PER_BEACON_SLOT * SLOTS_PER_EPOCH +PHASE_1_FORK_SLOT: 128 diff --git a/test_libs/pyspec/eth2spec/test/helpers/phase1/shard_block.py b/test_libs/pyspec/eth2spec/test/helpers/phase1/shard_block.py index 4e1981727..42e2765ea 100644 --- a/test_libs/pyspec/eth2spec/test/helpers/phase1/shard_block.py +++ b/test_libs/pyspec/eth2spec/test/helpers/phase1/shard_block.py @@ -7,6 +7,10 @@ from eth2spec.utils.ssz.ssz_impl import ( signing_root, ) +from .attestations import ( + sign_shard_attestation, +) + @only_with_bls() def sign_shard_block(spec, state, block, shard, proposer_index=None): @@ -26,22 +30,52 @@ def sign_shard_block(spec, state, block, shard, proposer_index=None): ) -def build_empty_shard_block(spec, state, slot, shard, parent_root, signed=False): +def build_empty_shard_block(spec, + shard_state, + beacon_state, + slot, + parent_root, + signed=False, + full_attestation=False): if slot is None: - slot = state.slot + slot = shard_state.slot + block = spec.ShardBlock( core=spec.ExtendedShardBlockCore( slot=slot, - beacon_chain_root=state.block_roots[state.slot % spec.SLOTS_PER_HISTORICAL_ROOT], + beacon_chain_root=beacon_state.block_roots[beacon_state.slot % spec.SLOTS_PER_HISTORICAL_ROOT], parent_root=parent_root, ), signatures=spec.ShardBlockSignatures( - attestation_signature=b'\x12' * 96, + attestation_signature=b'\x00' * 96, proposer_signature=b'\x25' * 96, ) ) + # attestation + if full_attestation: + attester_committee = spec.get_persistent_committee(beacon_state, shard_state.shard, block.core.slot) + block.core.attester_bitfield = list( + (True,) * len(attester_committee) + + (False,) * (spec.MAX_PERSISTENT_COMMITTEE_SIZE * 2 - len(attester_committee)) + ) + block.signatures.attestation_signature = sign_shard_attestation( + spec, + shard_state, + beacon_state, + block, + participants=attester_committee, + ) + else: + block.signatures.attestation_signature = sign_shard_attestation( + spec, + shard_state, + beacon_state, + block, + participants=(), + ) + if signed: - sign_shard_block(spec, state, block, shard) + sign_shard_block(spec, beacon_state, block, shard_state.shard) return block diff --git a/test_libs/pyspec/eth2spec/test/phase_1/shard_data_chain/test_shard_block.py b/test_libs/pyspec/eth2spec/test/phase_1/shard_data_chain/test_shard_block.py index 359350d39..2bb0232f0 100644 --- a/test_libs/pyspec/eth2spec/test/phase_1/shard_data_chain/test_shard_block.py +++ b/test_libs/pyspec/eth2spec/test/phase_1/shard_data_chain/test_shard_block.py @@ -11,16 +11,58 @@ from eth2spec.test.context import ( @with_all_phases_except(['phase0']) @always_bls @spec_state_test -def test_is_valid_shard_block(spec, state): +def test_process_empty_shard_block(spec, state): + beacon_state = state + + shard_slot = spec.PHASE_1_FORK_SLOT + beacon_state.slot = spec.Slot(spec.PHASE_1_FORK_EPOCH * spec.SLOTS_PER_EPOCH) + shard_state = spec.get_default_shard_state(beacon_state, shard=spec.Shard(0)) + shard_state.slot = shard_slot + block = build_empty_shard_block( spec, - state, - slot=spec.Slot(spec.PERSISTENT_COMMITTEE_PERIOD * 100), - shard=spec.Shard(1), + shard_state, + beacon_state, + slot=shard_slot + 1, parent_root=spec.Hash(), signed=True, + full_attestation=False, ) - # TODO: test `is_valid_shard_block` + yield 'pre', shard_state + yield 'beacon_state', beacon_state + yield 'block', block - yield 'blocks', (block,) + spec.shard_state_transition(shard_state, beacon_state, block) + + yield 'post', shard_state + + +@with_all_phases_except(['phase0']) +@always_bls +@spec_state_test +def test_process_full_attestation_shard_block(spec, state): + beacon_state = state + + shard_slot = spec.PHASE_1_FORK_SLOT + beacon_state.slot = spec.Slot(spec.PHASE_1_FORK_EPOCH * spec.SLOTS_PER_EPOCH) + shard_state = spec.get_default_shard_state(beacon_state, shard=spec.Shard(0)) + shard_state.slot = shard_slot + + block = build_empty_shard_block( + spec, + shard_state, + beacon_state, + slot=shard_slot + 1, + parent_root=spec.Hash(), + signed=True, + full_attestation=True, + ) + + yield 'pre', shard_state + yield 'beacon_state', beacon_state + yield 'block', block + + spec.shard_state_transition(shard_state, beacon_state, block) + + yield 'post', shard_state From db292502567c67b9cec08d8d19bbde6ae8ab6aad Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 1 Aug 2019 14:22:01 +0800 Subject: [PATCH 064/130] Add testing helpers --- .../test/helpers/phase1/attestations.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 test_libs/pyspec/eth2spec/test/helpers/phase1/attestations.py diff --git a/test_libs/pyspec/eth2spec/test/helpers/phase1/attestations.py b/test_libs/pyspec/eth2spec/test/helpers/phase1/attestations.py new file mode 100644 index 000000000..750ab5048 --- /dev/null +++ b/test_libs/pyspec/eth2spec/test/helpers/phase1/attestations.py @@ -0,0 +1,37 @@ +from eth2spec.test.helpers.keys import privkeys +from eth2spec.utils.bls import ( + bls_aggregate_signatures, + bls_sign, +) + + +def sign_shard_attestation(spec, shard_state, beacon_state, block, participants): + signatures = [] + message_hash = block.core.parent_root + block_epoch = spec.compute_epoch_of_shard_slot(block.core.slot) + for validator_index in participants: + privkey = privkeys[validator_index] + signatures.append( + get_attestation_signature( + spec, + shard_state, + beacon_state, + message_hash, + block_epoch, + privkey, + ) + ) + + return bls_aggregate_signatures(signatures) + + +def get_attestation_signature(spec, shard_state, beacon_state, message_hash, block_epoch, privkey): + return bls_sign( + message_hash=message_hash, + privkey=privkey, + domain=spec.get_domain( + state=beacon_state, + domain_type=spec.DOMAIN_SHARD_ATTESTER, + message_epoch=block_epoch, + ) + ) From 4163053ccad9f55aefd13f8bcc51ec086a12acf6 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 1 Aug 2019 16:45:01 +0800 Subject: [PATCH 065/130] Clean up type hintings, especially `reward` is denominated in uint, and `fee` is in `Gwei` --- specs/core/1_shard-data-chains.md | 35 ++++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index 0ef54570a..c4d8e2701 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -218,9 +218,7 @@ def get_shard_period_start_epoch(epoch: Epoch, lookback: int=0) -> Epoch: ### `get_period_committee` ```python -def get_period_committee(state: BeaconState, - epoch: Epoch, - shard: Shard) -> List[ValidatorIndex, MAX_PERSISTENT_COMMITTEE_SIZE]: +def get_period_committee(state: BeaconState, epoch: Epoch, shard: Shard) -> Sequence[ValidatorIndex]: """ Return committee for a period. Used to construct persistent committees. """ @@ -363,8 +361,8 @@ def get_default_shard_state(beacon_state: BeaconState, shard: Shard) -> ShardSta slot=PHASE_1_FORK_SLOT, earlier_committee_rewards=[REWARD_COEFFICIENT_BASE for _ in range(len(earlier_committee))], later_committee_rewards=[REWARD_COEFFICIENT_BASE for _ in range(len(later_committee))], - earlier_committee_fees=[0 for _ in range(len(earlier_committee))], - later_committee_fees=[0 for _ in range(len(later_committee))], + earlier_committee_fees=[Gwei(0) for _ in range(len(earlier_committee))], + later_committee_fees=[Gwei(0) for _ in range(len(later_committee))], ) ``` @@ -383,7 +381,7 @@ Note that these acceptance conditions depend on the canonical beacon chain; when ### Shard state transition function helpers ```python -def add_reward(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, delta: Gwei) -> None: +def add_reward(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, delta: int) -> None: epoch = compute_epoch_of_shard_slot(state.slot) earlier_committee = get_period_committee( beacon_state, @@ -400,7 +398,7 @@ def add_reward(state: ShardState, beacon_state: BeaconState, index: ValidatorInd ``` ```python -def add_fee(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, delta: Gwei) -> None: +def add_fee(state: ShardState, beacon_state: BeaconState, index: ValidatorIndex, delta: int) -> None: epoch = compute_epoch_of_shard_slot(state.slot) earlier_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=2), state.shard) later_committee = get_period_committee(beacon_state, get_shard_period_start_epoch(epoch, lookback=1), state.shard) @@ -452,13 +450,17 @@ def shard_slot_transition(state: ShardState, beacon_state: BeaconState) -> None: state.shard, ) state.receipt_root = hash_tree_root(List[ShardReceiptDelta, PLACEHOLDER]([ - ShardReceiptDelta(index, state.earlier_committee_rewards[i], state.earlier_committee_fees[i]) - for i, index in enumerate(earlier_committee) + ShardReceiptDelta( + index=validator_index, + reward_coefficient=state.earlier_committee_rewards[i], + block_fee=state.earlier_committee_fees[i], + ) + for i, validator_index in enumerate(earlier_committee) ])) state.earlier_committee_rewards = state.later_committee_rewards state.earlier_committee_fees = state.later_committee_fees state.later_committee_rewards = [REWARD_COEFFICIENT_BASE for _ in range(len(later_committee))], - state.later_committee_fees = [0 for _ in range(len(later_committee))], + state.later_committee_fees = [Gwei(0) for _ in range(len(later_committee))], else: state.receipt_root = Hash() state.slot += ShardSlot(1) @@ -478,19 +480,18 @@ def shard_block_transition(state: ShardState, # Calculate base reward total_balance = get_total_active_balance(beacon_state) - base_reward = Gwei( + base_reward = ( REWARD_COEFFICIENT_BASE * BASE_REWARD_FACTOR // integer_squareroot(total_balance) // BASE_REWARDS_PER_EPOCH ) - # Check attestations attester_committee = get_persistent_committee(beacon_state, state.shard, block.core.slot) pubkeys = [] attestations = 0 - for i, index in enumerate(attester_committee): + for i, validator_index in enumerate(attester_committee): if block.core.attester_bitfield[i]: - pubkeys.append(beacon_state.validators[index].pubkey) - add_reward(state, beacon_state, index, base_reward) + pubkeys.append(beacon_state.validators[validator_index].pubkey) + add_reward(state, beacon_state, validator_index, base_reward) attestations += 1 for i in range(len(attester_committee), MAX_PERSISTENT_COMMITTEE_SIZE): @@ -506,7 +507,7 @@ def shard_block_transition(state: ShardState, # Check proposer proposer_index = get_shard_block_proposer_index(beacon_state, state.shard, block.core.slot) assert proposer_index is not None - add_reward(state, beacon_state, proposer_index, Gwei(attestations * base_reward // PROPOSER_REWARD_QUOTIENT)) + add_reward(state, beacon_state, proposer_index, attestations * base_reward // PROPOSER_REWARD_QUOTIENT) assert bls_verify( pubkey=beacon_state.validators[proposer_index].pubkey, message_hash=hash_tree_root(block.core), @@ -515,7 +516,7 @@ def shard_block_transition(state: ShardState, ) # Process and update block data fees - add_fee(state, beacon_state, proposer_index, Gwei(state.basefee * len(block.core.data) // SHARD_BLOCK_SIZE_LIMIT)) + add_fee(state, beacon_state, proposer_index, state.basefee * len(block.core.data) // SHARD_BLOCK_SIZE_LIMIT) QUOTIENT = SHARD_BLOCK_SIZE_LIMIT * BASEFEE_ADJUSTMENT_FACTOR if len(block.core.data) > SHARD_BLOCK_SIZE_TARGET: state.basefee += Gwei(max(1, state.basefee * (len(block.core.data) - SHARD_BLOCK_SIZE_TARGET) // QUOTIENT)) From cb1a0cbd5f49eed89a0aa058a60e5e134543fffe Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 07:57:34 -0400 Subject: [PATCH 066/130] Update specs/light_client/merkle_proofs.md --- specs/light_client/merkle_proofs.md | 1 + 1 file changed, 1 insertion(+) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index f62dc8d5c..5b88fbbca 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -120,6 +120,7 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> Generalized Converts a path (eg. `[7, "foo", 3]` for `x[7].foo[3]`, `[12, "bar", "__len__"]` for `len(x[12].bar)`) into the generalized index representing its position in the Merkle tree. """ + root = 1 for p in path: assert not is_basic_type(typ) # If we descend to a basic type, the path cannot continue further if p == '__len__': From ed3749264b9ce5cbc444911da99ed289739e1d30 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 08:07:57 -0400 Subject: [PATCH 067/130] Update specs/light_client/merkle_proofs.md Co-Authored-By: Danny Ryan --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 5b88fbbca..114947326 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -202,7 +202,7 @@ def get_expanded_indices(indices: List[int]) -> List[int]: branches = set() for index in indices: branches = branches.union(set(get_branch_indices(index) + [index])) - return sorted(list([x for x in branches if x*2 not in branches or x*2+1 not in branches]))[::-1] + return sorted([x for x in branches if x*2 not in branches or x*2+1 not in branches])[::-1] ``` Generating a proof that covers paths `p1 ... pn` is simply a matter of taking the chunks in the SSZ hash tree with generalized indices `get_expanded_indices([p1 ... pn])`. From 92fc0f2b81a2cc8a589fee54109533d843fdc182 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 08:08:12 -0400 Subject: [PATCH 068/130] Update specs/light_client/merkle_proofs.md Co-Authored-By: Danny Ryan --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 114947326..81650bdbf 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -221,7 +221,7 @@ def fill(objects: Dict[int, Bytes32]) -> Dict[int, Bytes32]: while pos < len(keys): k = keys[pos] if k in objects and k ^ 1 in objects and k // 2 not in objects: - objects[k // 2] = hash(objects[k & - 2] + objects[k | 1]) + objects[k // 2] = hash(objects[k & -2] + objects[k | 1]) keys.append(k // 2) pos += 1 # Completeness and consistency check From 446ad3c392439fb916cf54cd6911d5dc5df1aab6 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 08:08:18 -0400 Subject: [PATCH 069/130] Update specs/light_client/merkle_proofs.md Co-Authored-By: Danny Ryan --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 81650bdbf..dae2a1704 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -267,7 +267,7 @@ def extract_value_at_path(chunks: Dict[int, Bytes32], typ: Type, path: List[Unio for p in path: if p == '__len__': return deserialize_basic(chunks[root * 2 + 1][:8], uint64) - if iissubclass(typ, (List, Bytes)): + if issubclass(typ, (List, Bytes)): assert 0 <= p < deserialize_basic(chunks[root * 2 + 1][:8], uint64) pos, start, end = get_item_position(typ, p) root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(get_chunk_count(typ)) + pos From e5350c10264139b163055773dc8e080201618de6 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Thu, 1 Aug 2019 12:44:41 +0200 Subject: [PATCH 070/130] Update libp2p networking spec --- specs/networking/libp2p-standardization.md | 158 ----- specs/networking/messaging.md | 45 -- specs/networking/node-identification.md | 31 - specs/networking/p2p-interface.md | 712 +++++++++++++++++++++ specs/networking/rpc-interface.md | 283 -------- 5 files changed, 712 insertions(+), 517 deletions(-) delete mode 100644 specs/networking/libp2p-standardization.md delete mode 100644 specs/networking/messaging.md delete mode 100644 specs/networking/node-identification.md create mode 100644 specs/networking/p2p-interface.md delete mode 100644 specs/networking/rpc-interface.md diff --git a/specs/networking/libp2p-standardization.md b/specs/networking/libp2p-standardization.md deleted file mode 100644 index d1ba07e65..000000000 --- a/specs/networking/libp2p-standardization.md +++ /dev/null @@ -1,158 +0,0 @@ -ETH 2.0 Networking Spec - Libp2p standard protocols -=== - -# Abstract - -Ethereum 2.0 clients plan to use the libp2p protocol networking stack for -mainnet release. This document aims to standardize the libp2p client protocols, -configuration and messaging formats. - -# Libp2p Components - -## Transport - -This section details the libp2p transport layer that underlies the -[protocols](#protocols) that are listed in this document. - -Libp2p allows composition of multiple transports. Eth2.0 clients should support -TCP/IP and optionally websockets. Websockets are useful for implementations -running in the browser and therefore native clients would ideally support these implementations -by supporting websockets. - -An ideal libp2p transport would therefore support both TCP/IP and websockets. - -*Note: There is active development in libp2p to facilitate the -[QUIC](https://github.com/libp2p/go-libp2p-quic-transport) transport, which may -be adopted in the future* - -### Encryption - -Libp2p currently offers [Secio](https://github.com/libp2p/specs/pull/106) which -can upgrade a transport which will then encrypt all future communication. Secio -generates a symmetric ephemeral key which peers use to encrypt their -communication. It can support a range of ciphers and currently supports key -derivation for elliptic curve-based public keys. - -Current defaults are: -- Key agreement: `ECDH-P256` (also supports `ECDH-P384`) -- Cipher: `AES-128` (also supports `AES-256`, `TwofishCTR`) -- Digests: `SHA256` (also supports `SHA512`) - -*Note: Secio is being deprecated in favour of [TLS -1.3](https://github.com/libp2p/specs/blob/master/tls/tls.md). It is our -intention to transition to use TLS 1.3 for encryption between nodes, rather -than Secio.* - - -## Protocols - -This section lists the necessary libp2p protocols required by Ethereum 2.0 -running a libp2p network stack. - -## Multistream-select - -#### Protocol id: `/multistream/1.0.0` - -Clients running libp2p should support the -[multistream-select](https://github.com/multiformats/multistream-select/) -protocol which allows clients to negotiate libp2p protocols establish streams -per protocol. - -## Multiplexing - -Libp2p allows clients to compose multiple multiplexing methods. Clients should -support [mplex](https://github.com/libp2p/specs/tree/master/mplex) and -optionally [yamux](https://github.com/hashicorp/yamux/blob/master/spec.md) -(these can be composed). - -**Mplex protocol id: `/mplex/6.7.0`** - -**Yamux protocol id: `/yamux/1.0.0`** - -## Gossipsub - -#### Protocol id: `/eth/serenity/gossipsub/1.0.0` - -*Note: Parameters listed here are subject to a large-scale network feasibility -study* - -The [Gossipsub](https://github.com/libp2p/specs/tree/master/pubsub/gossipsub) -protocol is used for block and attestation propagation across the -network. - -### Configuration Parameters - -Gossipsub has a number of internal configuration parameters which directly -effect the network performance. Clients can implement independently, however -we aim to standardize these across clients to optimize the gossip network for -propagation times and message duplication. Current network-related defaults are: - -``` -( - // The target number of peers in the overlay mesh network (D in the libp2p specs). - mesh_size: 6 - // The minimum number of peers in the mesh network before adding more (D_lo in the libp2p specs). - mesh_lo: 4 - // The maximum number of peers in the mesh network before removing some (D_high in the libp2p sepcs). - mesh_high: 12 - // The number of peers to gossip to during a heartbeat (D_lazy in the libp2p sepcs). - gossip_lazy: 6 // defaults to `mesh_size` - // Time to live for fanout peers (seconds). - fanout_ttl: 60 - // The number of heartbeats to gossip about. - gossip_history: 3 - // Time between each heartbeat (seconds). - heartbeat_interval: 1 -) -``` - -### Topics - -*The Go and Js implementations use string topics - This is likely to be -updated to topic hashes in later versions - https://github.com/libp2p/rust-libp2p/issues/473* - -For Eth2.0 clients, topics are sent as `SHA2-256` hashes of the topic string. - -There are two main topics used to propagate attestations and beacon blocks to -all nodes on the network. - -- The `beacon_block` topic - This topic is used solely for propagating new - beacon blocks to all nodes on the networks. -- The `beacon_attestation` topic - This topic is used to propagate - aggregated attestations to subscribing nodes (typically block proposers) to - be included into future blocks. Attestations are aggregated in their - respective subnets before publishing on this topic. - -Shards are grouped into their own subnets (defined by a shard topic). The -number of shard subnets is defined via `SHARD_SUBNET_COUNT` and the shard -`shard_number % SHARD_SUBNET_COUNT` is assigned to the topic: -`shard{shard_number % SHARD_SUBNET_COUNT}_attestation`. - -### Messages - -*Note: The message format here is Eth2.0-specific* - -Each Gossipsub -[Message](https://github.com/libp2p/go-libp2p-pubsub/blob/master/pb/rpc.proto#L17-L24) -has a maximum size of 512KB (estimated from expected largest uncompressed block -size). - -The `data` field of a Gossipsub `Message` is an SSZ-encoded object. For the `beacon_block` topic, -this is a `beacon_block`. For the `beacon_attestation` topic, this is -an `attestation`. - -## Eth-2 RPC - -#### Protocol Id: `/eth/serenity/beacon/rpc/1` - -The [RPC Interface](./rpc-interface.md) is specified in this repository. - -## Discovery - -Discovery Version 5 -([discv5](https://github.com/ethereum/devp2p/blob/master/discv5/discv5.md)) -will be used for discovery. This protocol uses a UDP transport and specifies -its own encryption, ip-discovery and topic advertisement. Therefore, it has no -need to establish streams through `multistream-select`, rather, act -as a standalone implementation that feeds discovered peers/topics (ENR-records) as -`multiaddrs` into the libp2p service. diff --git a/specs/networking/messaging.md b/specs/networking/messaging.md deleted file mode 100644 index d7cb5bb5b..000000000 --- a/specs/networking/messaging.md +++ /dev/null @@ -1,45 +0,0 @@ -# Eth 2.0 Networking Spec - Messaging - -## Abstract - -This specification describes how individual Ethereum 2.0 messages are represented on the wire. - -The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL”, NOT", “SHOULD”, “SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119). - -## Motivation - -This specification seeks to define a messaging protocol that is flexible enough to be changed easily as the Eth 2.0 specification evolves. - -Note that while `libp2p` is the chosen networking stack for Ethereum 2.0, as of this writing some clients do not have workable `libp2p` implementations. To allow those clients to communicate, we define a message envelope that includes the body's compression, encoding, and body length. Once `libp2p` is available across all implementations, this message envelope will be removed because `libp2p` will negotiate the values defined in the envelope upfront. - -## Specification - -### Message structure - -An Eth 2.0 message consists of an envelope that defines the message's compression, encoding, and length followed by the body itself. - -Visually, a message looks like this: - -``` -+--------------------------+ -| compression nibble | -+--------------------------+ -| encoding nibble | -+--------------------------+ -| body length (uint64) | -+--------------------------+ -| | -| body | -| | -+--------------------------+ -``` - -Clients MUST ignore messages with malformed bodies. The compression/encoding nibbles MUST be one of the following values: - -### Compression nibble values - -- `0x0`: no compression - -### Encoding nibble values - -- `0x1`: SSZ diff --git a/specs/networking/node-identification.md b/specs/networking/node-identification.md deleted file mode 100644 index 32ec4dfad..000000000 --- a/specs/networking/node-identification.md +++ /dev/null @@ -1,31 +0,0 @@ -# Eth 2.0 Networking Spec - Node Identification - -## Abstract - -This specification describes how Ethereum 2.0 nodes identify and address each other on the network. - -The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL", NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119). - -## Specification - -Clients use Ethereum Node Records (as described in [EIP-778](http://eips.ethereum.org/EIPS/eip-778)) to discover one another. Each ENR includes, among other things, the following keys: - -- The node's IP. -- The node's TCP port. -- The node's public key. - -For clients to be addressable, their ENR responses MUST contain all of the above keys. Client MUST verify the signature of any received ENRs, and disconnect from peers whose ENR signatures are invalid. Each node's public key MUST be unique. - -The keys above are enough to construct a [multiaddr](https://github.com/multiformats/multiaddr) for use with the rest of the `libp2p` stack. - -It is RECOMMENDED that clients set their TCP port to the default of `9000`. - -### Peer ID generation - -The `libp2p` networking stack identifies peers via a "peer ID." Simply put, a node's Peer ID is the SHA2-256 `multihash` of the node's public key struct (serialized in protobuf, refer to the [Peer ID spec](https://github.com/libp2p/specs/pull/100)). `go-libp2p-crypto` contains the canonical implementation of how to hash `secp256k1` keys for use as a peer ID. - -## See also - -- [multiaddr](https://github.com/multiformats/multiaddr) -- [multihash](https://multiformats.io/multihash/) -- [go-libp2p-crypto](https://github.com/libp2p/go-libp2p-crypto) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md new file mode 100644 index 000000000..72f5c0fd6 --- /dev/null +++ b/specs/networking/p2p-interface.md @@ -0,0 +1,712 @@ +# Overview + +This document contains the network specification for Ethereum 2.0 clients. + +It consists of four main sections: + +1. A specification of the network fundamentals detailing the two network configurations: interoperability test network, and mainnet launch. +2. A specification of the three network interaction _domains_ of ETH2.0: (a) the gossip domain, (b) the discovery domain, \(c\) the Req/Resp domain. +3. The rationale and further explanation for the design choices made in the previous two sections. +4. An analysis of the maturity/state of the libp2p features required by this spec across the languages in which ETH 2.0 clients are being developed. + +## Table of Contents +[TOC] + +# Network Fundamentals + +This section outlines the specification for the networking stack in Ethereum 2.0 clients. + +Sections that have differing parameters for mainnet launch and interoperability testing are split into subsections. Sections that are not split have the same parameters for interoperability testing as mainnet launch. + +## Transport + +Even though libp2p is a multi-transport stack (designed to listen on multiple simultaneous transports and endpoints transparently), we hereby define a profile for basic interoperability. + +#### Interop + +All implementations MUST support the TCP libp2p transport, and it MUST be enabled for both dialing and listening (i.e. outbound and inbound connections). + +The libp2p TCP transport supports listening on IPv4 and IPv6 addresses (and on multiple simultaneously). Clients SHOULD allow the operator to configure the listen IP addresses and ports, including the addressing schemes (IPv4, IPv6). + +To facilitate connectivity, and avert possible IPv6 routability/support issues, clients participating in the interoperability testnet MUST expose at least ONE IPv4 endpoint. + +All listening endpoints must be publicly dialable, and thus not rely on libp2p circuit relay, AutoNAT or AutoRelay facilities. + +Nodes operating behind a NAT, or otherwise undialable by default (e.g. container runtime, firewall, etc.), MUST have their infrastructure configured to enable inbound traffic on the announced public listening endpoint. + +#### Mainnet + +All requirements from the interoperability testnet apply, except for the IPv4 addressing scheme requirement. + +At this stage, clients are licensed to drop IPv4 support if they wish to do so, cognizant of the potential disadvantages in terms of Internet-wide routability/support. Clients MAY choose to listen only on IPv6, but MUST retain capability to dial both IPv4 and IPv6 addresses. + +Usage of circuit relay, AutoNAT or AutoRelay will be specifically re-examined closer to the time. + +## Encryption and identification + +#### Interop + +[SecIO](https://github.com/libp2p/specs/tree/master/secio) with `secp256k1` identities will be used for initial interoperability testing. + +The following SecIO parameters MUST be supported by all stacks: + +- Key agreement: ECDH-P256. +- Cipher: AES-128. +- Digest: SHA256. + +#### Mainnet + +[Noise Framework](http://www.noiseprotocol.org/) handshakes will be used for mainnet. libp2p Noise support [is in the process of being standardised](https://github.com/libp2p/specs/issues/195) in the libp2p project. + +Noise support will presumably include IX, IK and XX handshake patterns, and may rely on Curve25519 keys, ChaCha20 and Poly1305 ciphers, and SHA256 as a hash function. These aspects are being actively debated in the referenced issue [ETH 2.0 implementers are welcome to comment and contribute to the discussion.] + +## Protocol Negotiation + +#### Interop + +Connection-level and stream-level (see the rationale section below for explanations) protocol negotiation MUST be conducted using [multistream-select v1.0](https://github.com/multiformats/multistream-select/). Its protocol ID is: `/multistream/1.0.0`. + +#### Mainnet + +Clients MUST support [multistream-select 1.0](https://github.com/multiformats/multistream-select/) and MAY support [multiselect 2.0](https://github.com/libp2p/specs/pull/95). Depending on the number of clients that have implementations for multiselect 2.0 by mainnet, [multistream-select 1.0](https://github.com/multiformats/multistream-select/) may be phased out. + +## Multiplexing + +During connection bootstrapping, libp2p dynamically negotiates a mutually supported multiplexing method to conduct parallel conversations. This applies to transports that are natively incapable of multiplexing (e.g. TCP, WebSockets, WebRTC), and is omitted for capable transports (e.g. QUIC). + +Two multiplexers are commonplace in libp2p implementations: [mplex](https://github.com/libp2p/specs/tree/master/mplex) and [yamux](https://github.com/hashicorp/yamux/blob/master/spec.md). Their protocol IDs are, respectively: `/mplex/6.7.0` and `/yamux/1.0.0`. + +Clients MUST support [mplex](https://github.com/libp2p/specs/tree/master/mplex) and MAY support [yamux](https://github.com/hashicorp/yamux/blob/master/spec.md). If both are supported by the client, yamux must take precedence during negotiation. See the Rationale section of this document for tradeoffs. + +# ETH2 network interaction domains + +## Constants + +This section outlines constants that are used in this spec. + +- `RQRP_MAX_SIZE`: The max size of uncompressed req/resp messages that clients will allow. + Value: TBD +- `GOSSIP_MAX_SIZE`: The max size of uncompressed gossip messages + Value: 1MB (estimated from expected largest uncompressed block size). +- `SHARD_SUBNET_COUNT`: The number of shard subnets used in the gossipsub protocol. + Value: TBD + +## The gossip domain: gossipsub + +Clients MUST support the [gossipsub](https://github.com/libp2p/specs/tree/master/pubsub/gossipsub) libp2p protocol. + +**Protocol ID:** `/meshsub/1.0.0` + +**Gossipsub Parameters** + +*Note: Parameters listed here are subject to a large-scale network feasibility study.* + +The following gossipsub parameters will be used: + +- `D` (topic stable mesh target count): 6 +- `D_low` (topic stable mesh low watermark): 4 +- `D_high` (topic stable mesh high watermark): 12 +- `D_lazy` (gossip target): 6 +- `fanout_ttl` (ttl for fanout maps for topics we are not subscribed to but have published to, seconds): 60 +- `gossip_advertise` (number of windows to gossip about): 3 +- `gossip_history` (number of heartbeat intervals to retain message IDs): 5 +- `heartbeat_interval` (frequency of heartbeat, seconds): 1 + +### Topics + +Topics are plain UTF-8 strings, and are encoded on the wire as determined by protobuf (gossipsub messages are enveloped in protobuf messages). + +Topic strings have form: `/eth2/TopicName/TopicEncoding`. This defines both the type of data being sent on the topic and how the data field of the message is encoded. (Further details can be found in [Messages](#Messages)). + +There are two main topics used to propagate attestations and beacon blocks to all nodes on the network. Their `TopicName`'s are: + +- `beacon_block` - This topic is used solely for propagating new beacon blocks to all nodes on the networks. Blocks are sent in their entirety. Clients who receive a block on this topic MUST validate the block proposer signature before forwarding it across the network. +- `beacon_attestation` - This topic is used to propagate aggregated attestations (in their entirety) to subscribing nodes (typically block proposers) to be included in future blocks. Similarly to beacon blocks, clients will be expected to perform some sort of validation before forwarding, but the precise mechanism is still TBD. + +Additional topics are used to propagate lower frequency validator messages. Their `TopicName`’s are: + +- `voluntary_exit` - This topic is used solely for propagating voluntary validator exits to proposers on the network. Voluntary exits are sent in their entirety. Clients who receive a voluntary exit on this topic MUST validate the conditions within `process_voluntary_exit` before forwarding it across the network. +- `proposer_slashing` - This topic is used solely for propagating proposer slashings to proposers on the network. Proposer slashings are sent in their entirety. Clients who receive a proposer slashing on this topic MUST validate the conditions within `process_proposer_slashing` before forwarding it across the network. +- `attester_slashing` - This topic is used solely for propagating attester slashings to proposers on the network. Attester slashings are sent in their entirety. Clients who receive an attester slashing on this topic MUST validate the conditions within `process_attester_slashing` before forwarding it across the network. + +#### Interop + +Unaggregated attestations from all shards are sent to the `beacon_attestation` topic. + +#### Mainnet + +Shards are grouped into their own subnets (defined by a shard topic). The number of shard subnets is defined via `SHARD_SUBNET_COUNT` and the shard `shard_number % SHARD_SUBNET_COUNT` is assigned to the topic: `shard{shard_number % SHARD_SUBNET_COUNT}_beacon_attestation`. Unaggregated attestations are sent to the subnet topic. Aggregated attestations are sent to the `beacon_attestation` topic. + +### Messages + +Each gossipsub [message](https://github.com/libp2p/go-libp2p-pubsub/blob/master/pb/rpc.proto#L17-L24) has a maximum size of `GOSSIP_MAX_SIZE`. + +Clients MUST reject (fail validation) messages that are over this size limit. Likewise, clients MUST NOT emit or propagate messages larger than this limit. + +The payload is carried in the `data` field of a gossipsub message, and varies depending on the topic: + + +| Topic | Message Type | +|------------------------------|-------------------| +| beacon_block | BeaconBlock | +| beacon_attestation | Attestation | +| shard{N}\_beacon_attestation | Attestation | +| voluntary_exit | VoluntaryExit | +| proposer_slashing | ProposerSlashing | +| attester_slashing | AttesterSlashing | + +Clients MUST reject (fail validation) messages containing an incorrect type, or invalid payload. + +When processing incoming gossip, clients MAY descore or disconnect peers who fail to observe these constraints. + +### Encodings + +Topics are post-fixed with an encoding. Encodings define how the payload of a gossipsub message is encoded. + +#### Interop + +- `ssz` - All objects are SSZ-encoded. Example: The beacon block topic string is: `/beacon_block/ssz` and the data field of a gossipsub message is an ssz-encoded `BeaconBlock`. + +#### Mainnet + +- `ssz_snappy` - All objects are ssz-encoded and then compressed with snappy. Example: The beacon attestation topic string is: `/beacon_attestation/ssz_snappy` and the data field of a gossipsub message is an `Attestation` that has been ssz-encoded then compressed with snappy. + +Implementations MUST use a single encoding. Changing an encoding will require coordination between participating implementations. + +## The discovery domain: discv5 + +Discovery Version 5 ([discv5](https://github.com/ethereum/devp2p/blob/master/discv5/discv5.md)) is used for peer discovery, both in the interoperability testnet and mainnet. + +`discv5` is a standalone protocol, running on UDP on a dedicated port, meant for peer discovery only. `discv5` supports self-certified, flexible peer records (ENRs) and topic-based advertisement, both of which are (or will be) requirements in this context. + +### Integration into libp2p stacks + +`discv5` SHOULD be integrated into the client’s libp2p stack by implementing an adaptor to make it conform to the [service discovery](https://github.com/libp2p/go-libp2p-core/blob/master/discovery/discovery.go) and [peer routing](https://github.com/libp2p/go-libp2p-core/blob/master/routing/routing.go#L36-L44) abstractions and interfaces (go-libp2p links provided). + +Inputs to operations include peer IDs (when locating a specific peer), or capabilities (when searching for peers with a specific capability), and the outputs will be multiaddrs converted from the ENR records returned by the discv5 backend. + +This integration enables the libp2p stack to subsequently form connections and streams with discovered peers. + +### ENR structure + +The Ethereum Node Record (ENR) for an Ethereum 2.0 client MUST contain the following entries (exclusive of the sequence number and signature, which MUST be present in an ENR): + +- The compressed secp256k1 publickey, 33 bytes (`secp256k1` field). +- An IPv4 address (`ip` field) and/or IPv6 address (`ip6` field). +- A TCP port (`tcp` field) representing the local libp2p listening port. +- A UDP port (`udp` field) representing the local discv5 listening port. + +Specifications of these parameters can be found in the [ENR Specification](http://eips.ethereum.org/EIPS/eip-778). + +#### Interop + +In the interoperability testnet, all peers will support all capabilities defined in this document (gossip, full Req/Resp suite, discovery protocol), therefore the ENR record does not need to carry ETH2 capability information, as it would be superfluous. + +Nonetheless, ENRs MUST carry a generic `eth2` key with nil value, denoting that the peer is indeed a ETH2 peer, in order to eschew connecting to ETH1 peers. + +#### Mainnet + +On mainnet, ENRs MUST include a structure enumerating the capabilities offered by the peer in an efficient manner. The concrete solution is currently undefined. Proposals include using namespaced bloom filters mapping capabilities to specific protocol IDs supported under that capability. + +### Topic advertisement + +#### Interop + +This feature will not be used in the interoperability testnet. + +#### Mainnet + +In mainnet, we plan to use discv5’s topic advertisement feature as a rendezvous facility for peers on shards (thus subscribing to the relevant gossipsub topics). + +## The Req/Resp domain + +### Protocol identification + +Each message type is segregated into its own libp2p protocol ID, which is a case-sensitive UTF-8 string of the form: + +``` +/ProtocolPrefix/MessageName/SchemaVersion/Encoding +``` + +With: + +- `ProtocolPrefix` - messages are grouped into families identified by a shared libp2p protocol name prefix. In this case, we use `/eth2/beacon_chain/req`. +- `MessageName` - each request is identified by a name consisting of English alphabet, digits and underscores (`_`). +- `SchemaVersion` - an ordinal version number (e.g. 1, 2, 3…) Each schema is versioned to facilitate backward and forward-compatibility when possible. +- `Encoding` - while the schema defines the data types in more abstract terms, the encoding strategy describes a specific representation of bytes that will be transmitted over the wire. See the [Encodings](#Encoding-strategies) section, for further details. + +This protocol segregation allows libp2p `multistream-select 1.0` / `multiselect 2.0` to handle the request type, version and encoding negotiation before establishing the underlying streams. + +### Req/Resp interaction + +We use ONE stream PER request/response interaction. Streams are closed when the interaction finishes, whether in success or in error. + +Request/response messages MUST adhere to the encoding specified in the protocol name, and follow this structure (relaxed BNF grammar): + +``` +request ::= | +response ::= | | +result ::= “0” | “1” | “2” | [“128” ... ”255”] +``` + +The encoding-dependent header may carry metadata or assertions such as the encoded payload length, for integrity and attack proofing purposes. It is not strictly necessary to length-prefix payloads, because req/resp streams are single-use, and stream closures implicitly delimit the boundaries, but certain encodings like SSZ do, for added security. + +`encoded-payload` has a maximum byte size of `RQRP_MAX_SIZE`. + +Clients MUST ensure the payload size is less than or equal to `RQRP_MAX_SIZE`, if not, they SHOULD reset the stream immediately. Clients tracking peer reputation MAY decrement the score of the misbehaving peer under this circumstance. + +#### Requesting side + +Once a new stream with the protocol ID for the request type has been negotiated, the full request message should be sent immediately. It should be encoded according to the encoding strategy. + +The requester MUST close the write side of the stream once it finishes writing the request message - at this point, the stream will be half-closed. + +The requester MUST wait a maximum of **5 seconds** for the first response byte to arrive (time to first byte – or TTFB – timeout). On that happening, the requester will allow further **10 seconds** to receive the full response. + +If any of these timeouts fire, the requester SHOULD reset the stream and deem the req/resp operation to have failed. + +#### Responding side + +Once a new stream with the protocol ID for the request type has been negotiated, the responder must process the incoming request message according to the encoding strategy, until EOF (denoting stream half-closure by the requester). + +The responder MUST: + +1. Use the encoding strategy to read the optional header. +2. If there are any length assertions for length `N`, it should read exactly `N` bytes from the stream, at which point an EOF should arise (no more bytes). Should this is not the case, it should be treated as a failure. +3. Deserialize the expected type, and process the request. +4. Write the response (result, optional header, payload). +5. Close their write side of the stream. At this point, the stream will be fully closed. + +If steps (1), (2) or (3) fail due to invalid, malformed or inconsistent data, the responder MUST respond in error. Clients tracking peer reputation MAY record such failures, as well as unexpected events, e.g. early stream resets. + +The entire request should be read in no more than **5 seconds**. Upon a timeout, the responder SHOULD reset the stream. + +The responder SHOULD send a response promptly, starting with a **single-byte** response code which determines the contents of the response (`result` particle in the BNF grammar above). + +It can have one of the following values: + +- 0: **Success** -- a normal response follows, with contents matching the expected message schema and encoding specified in the request. +- 1: **InvalidRequest** -- the contents of the request are semantically invalid, or the payload is malformed, or could not be understood. The response payload adheres to the ErrorMessage schema (described below). +- 2: **ServerError** -- the responder encountered an error while processing the request. The response payload adheres to the ErrorMessage schema (described below). + +Clients MAY use response codes above `128` to indicate alternative, erroneous request-specific responses. + +The range `[3, 127]` is RESERVED for future usages, and should be treated as error if not recognised expressly. + +The `ErrorMessage` schema is: + +``` +( + error_message: String +) +``` + +*Note that the String type is encoded as UTF-8 bytes when SSZ-encoded.* + +A response therefore has the form: +``` + +--------+--------+--------+--------+--------+--------+ + | result | header (opt) | encoded_response | + +--------+--------+--------+--------+--------+--------+ +``` +Here `result` represents the 1-byte response code. + +### Encoding strategies + +The token of the negotiated protocol ID specifies the type of encoding to be used for the req/resp interaction. Two values are possible at this time: + +- `ssz`: the contents are [SSZ](https://github.com/ethereum/eth2.0-specs/blob/192442be51a8a6907d6401dffbf5c73cb220b760/specs/networking/libp2p-standardization.md#ssz-encoding) encoded. This encoding type MUST be supported by all clients. +- `ssz_snappy`: the contents are SSZ encoded, and subsequently compressed with [Snappy](https://github.com/google/snappy). MAY be supported in the interoperability testnet; and MUST be supported in mainnet. + +#### SSZ encoding strategy (with or without Snappy) + +The [SimpleSerialize (SSZ) specification](https://github.com/ethereum/eth2.0-specs/blob/192442be51a8a6907d6401dffbf5c73cb220b760/specs/simple-serialize.md) outlines how objects are SSZ-encoded. If the Snappy variant is selected, we feed the serialised form to the Snappy compressor on encoding. The inverse happens on decoding. + +**Encoding-dependent header:** Req/Resp protocols using the `ssz` or `ssz_snappy` encoding strategies MUST prefix all encoded and compressed (if applicable) payloads with an unsigned [protobuf varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). + +Note that parameters defined as `[]VariableName` are SSZ-encoded containerless vectors. + +### Messages + +#### Hello + +**Protocol ID:** ``/eth2/beacon_chain/req/hello/1/`` + +**Content**: +``` +( + fork_version: bytes4 + finalized_root: bytes32 + finalized_epoch: uint64 + head_root: bytes32 + head_slot: uint64 +) +``` +The fields are: + +- `fork_version`: The beacon_state `Fork` version +- `finalized_root`: The latest finalized root the node knows about +- `finalized_epoch`: The latest finalized epoch the node knows about +- `head_root`: The block hash tree root corresponding to the head of the chain as seen by the sending node +- `head_slot`: The slot corresponding to the `head_root`. + +Clients exchange hello messages upon connection, forming a two-phase handshake. The first message the initiating client sends MUST be the hello message. In response, the receiving client MUST respond with its own hello message. + +Clients SHOULD immediately disconnect from one another following the handshake above under the following conditions: + +1. If `fork_version` doesn’t match the local fork version, since the client’s chain is on another fork. `fork_version` can also be used to segregate testnets. +2. If the (`finalized_root`, `finalized_epoch`) shared by the peer is not in the client's chain at the expected epoch. For example, if Peer 1 sends (root, epoch) of (A, 5) and Peer 2 sends (B, 3) but Peer 1 has root C at epoch 3, then Peer 1 would disconnect because it knows that their chains are irreparably disjoint. + +Once the handshake completes, the client with the lower `finalized_epoch` or `head_slot` (if the clients have equal `finalized_epoch`s) SHOULD request beacon blocks from its counterparty via the `BeaconBlocks` request. + +#### Goodbye + +**Protocol ID:** ``/eth2/beacon_chain/req/goodbye/1/`` + +**Content:** +``` +( + reason: uint64 +) +``` +Client MAY send goodbye messages upon disconnection. The reason field MAY be one of the following values: + +- 1: Client shut down. +- 2: Irrelevant network. +- 3: Fault/error. + +Clients MAY use reason codes above `128` to indicate alternative, erroneous request-specific responses. + +The range `[4, 127]` is RESERVED for future usage. + +#### BeaconBlocks + +**Protocol ID:** `/eth2/beacon_chain/req/beacon_blocks/1/` + +Request Content +``` +( + head_block_root: HashTreeRoot + start_slot: uint64 + count: uint64 + step: uint64 +) +``` + +Response Content: +``` +( + blocks: []BeaconBlock +) +``` + +Requests count beacon blocks from the peer starting from `start_slot` on the chain defined by `head_block_root`. The response MUST contain no more than count blocks. step defines the slot increment between blocks. For example, requesting blocks starting at `start_slot` 2 with a step value of 2 would return the blocks at [2, 4, 6, …]. In cases where a slot is empty for a given slot number, no block is returned. For example, if slot 4 were empty in the previous example, the returned array would contain [2, 6, …]. A step value of 1 returns all blocks on the range `[start_slot, start_slot + count)`. + +`BeaconBlocks` is primarily used to sync historical blocks. + +Clients MUST support requesting blocks since the start of the weak subjectivity period and up to the given `head_block_root`. + +Clients MUST support `head_block_root` values since the latest finalized epoch. + +#### RecentBeaconBlocks + +**Protocol ID:** `/eth2/beacon_chain/req/recent_beacon_blocks/1/` + +Request Content: + +``` +( + block_roots: []HashTreeRoot +) +``` + +Response Content: + +``` +( + blocks: []BeaconBlock +) +``` + +Requests blocks by their block roots. The response is a list of `BeaconBlock` with the same length as the request. Blocks are returned in order of the request and any missing/unknown blocks are left empty (SSZ null `BeaconBlock`). + +`RecentBeaconBlocks` is primarily used to recover recent blocks, for example when receiving a block or attestation whose parent is unknown. + +Clients MUST support requesting blocks since the latest finalized epoch. + +# Design Decision Rationale + +## Transport + +### Why are we defining specific transports? + +libp2p peers can listen on multiple transports concurrently, and these can change over time. multiaddrs not only encode the address, but also the transport to be used to dial. + +Due to this dynamic nature, agreeing on specific transports like TCP, QUIC or WebSockets on paper becomes irrelevant. + +However, it is useful to define a minimum baseline for interoperability purposes. + +### Can clients support other transports/handshakes than the ones mandated by the spec? + +Clients may support other transports such as libp2p QUIC, WebSockets, and WebRTC transports, if available in the language of choice. While interoperability shall not be harmed by lack of such support, the advantages are desirable: + +- better latency, performance and other QoS characteristics (QUIC). +- paving the way for interfacing with future light clients (WebSockets, WebRTC). + +The libp2p QUIC transport inherently relies on TLS 1.3 per requirement in section 7 of the [QUIC protocol specification](https://tools.ietf.org/html/draft-ietf-quic-transport-22#section-7), and the accompanying [QUIC-TLS document](https://tools.ietf.org/html/draft-ietf-quic-tls-22). + +The usage of one handshake procedure or the other shall be transparent to the ETH 2.0 application layer, once the libp2p Host/Node object has been configured appropriately. + +### What are advantages of using TCP/QUIC/Websockets? + +TCP is a reliable, ordered, full-duplex, congestion controlled network protocol that powers much of the Internet as we know it today. HTTP/1.1 and HTTP/2 run atop TCP. + +QUIC is a new protocol that’s in the final stages of specification by the IETF QUIC WG. It emerged from Google’s SPDY experiment. The QUIC transport is undoubtedly promising. It’s UDP based yet reliable, ordered, reduces latency vs. TCP, is multiplexed, natively secure (TLS 1.3), offers stream-level and connection-level congestion control (thus removing head-of-line blocking), 0-RTT connection establishment, and endpoint migration, amongst other features. UDP also has better NAT traversal properties than TCP -- something we desperately pursue in peer-to-peer networks. + +QUIC is being adopted as the underlying protocol for HTTP/3. This has the potential to award us censorship resistance via deep packet inspection for free. Provided that we use the same port numbers and encryption mechanisms as HTTP/3, our traffic may be indistinguishable from standard web traffic, and we may only become subject to standard IP-based firewall filtering -- something we can counteract via other mechanisms. + +WebSockets and/or WebRTC transports are necessary for interaction with browsers, and will become increasingly important as we incorporate browser-based light clients to the ETH2 network. + +### Why do we not just support a single transport? + +Networks evolve. Hardcoding design decisions leads to ossification, preventing the evolution of networks alongside the state of the art. Introducing changes on an ossified protocol is very costly, and sometimes, downright impracticable without causing undesirable breakage. + +Modelling for upgradeability and dynamic transport selection from the get-go lays the foundation for a future-proof stack. + +Clients can adopt new transports without breaking old ones; and the multi-transport ability enables constrained and sandboxed environments (e.g. browsers, embedded devices) to interact with the network as first-class citizens via suitable/native transports (e.g. WSS), without the need for proxying or trust delegation to servers. + +### Why are we not using QUIC for mainnet from the start? + +The QUIC standard is still not finalised (at working draft 22 at the time of writing), and not all mainstream runtimes/languages have mature, standard, and/or fully-interoperable [QUIC support](https://github.com/quicwg/base-drafts/wiki/Implementations). One remarkable example is node.js, where the QUIC implementation is [in early development](https://github.com/nodejs/quic). + +## Multiplexing + +### Why are we using mplex/yamux? + +[Yamux](https://github.com/hashicorp/yamux/blob/master/spec.md) is a multiplexer invented by Hashicorp that supports stream-level congestion control. Implementations exist in a limited set of languages, and it’s not a trivial piece to develop. + +Conscious of that, the libp2p community conceptualised [mplex](https://github.com/libp2p/specs/blob/master/mplex/README.md) as a simple, minimal multiplexer for usage with libp2p. It does not support stream-level congestion control, and is subject to head-of-line blocking. + +Overlay multiplexers are not necessary with QUIC, as the protocol provides native multiplexing, but they need to be layered atop TCP, WebSockets, and other transports that lack such support. + +## Protocol Negotiation + +### When is multiselect 2.0 due and why are we using it for mainnet? + +multiselect 2.0 is currently being conceptualised. Debate started [on this issue](https://github.com/libp2p/specs/pull/95), but it got overloaded – as it tends to happen with large conceptual OSS discussions that touch the heart and core of a system. + +In the following weeks (August 2019), there will be a renewed initiative to first define the requirements, constraints, assumptions and features, in order to lock in basic consensus upfront, to subsequently build on that consensus by submitting a specification for implementation. + +We plan to use multiselect 2.0 for mainnet because it will: + +1. Reduce round trips during connection bootstrapping and stream protocol negotiation. +2. Enable efficient one-stream-per-request interaction patterns. +3. Leverage *push data* mechanisms of underlying protocols to expedite negotiation. +4. Provide the building blocks for enhanced censorship resistance. + +### What is the difference between connection-level and stream-level protocol negotiation? + +All libp2p connections must be authenticated, encrypted, and multiplexed. Connections using network transports unsupportive of native authentication/encryption and multiplexing (e.g. TCP) need to undergo protocol negotiation to agree on a mutually supported: + +1. authentication/encryption mechanism (such as SecIO, TLS 1.3, Noise). +2. overlay multiplexer (such as mplex, Yamux, spdystream). + +In this specification, we refer to these two as *connection-level negotiations*. Transports supporting those features natively (such as QUIC) omit those negotiations. + +After successfully selecting a multiplexer, all subsequent I/O happens over *streams*. When opening streams, peers pin a protocol to that stream, by conducting *stream-level protocol negotiation*. + +At present, multistream-select 1.0 is used for both types of negotiation, but multiselect 2.0 will use dedicated mechanisms for connection bootstrapping process and stream protocol negotiation. + +## Encryption + +### Why are we using SecIO for interop? Why not for mainnet? + +SecIO has been the default encryption layer for libp2p for years. It is used in IPFS and Filecoin. And although it will be superseded shortly, it is proven to work at scale. + +SecIO is the common denominator across the various language libraries at this stage. It is widely implemented. That’s why we have chosen to use it for initial interop to minimize overhead in getting to a basic interoperability testnet. + +We won’t be using it for mainnet because, amongst other things, it requires several round trips to be sound, and doesn’t support early data (0-RTT data), a mechanism that multiselect 2.0 will leverage to reduce round trips during connection bootstrapping. + +SecIO is not considered secure for the purposes of this spec. + +## Why are we using Noise/TLS 1.3 for mainnet? + +Copied from the Noise Protocol Framework website: + +> Noise is a framework for building crypto protocols. Noise protocols support mutual and optional authentication, identity hiding, forward secrecy, zero round-trip encryption, and other advanced features. + +Noise in itself does not specify a single handshake procedure, but provides a framework to build secure handshakes based on Diffie-Hellman key agreement with a variety of tradeoffs and guarantees. + +Noise handshakes are lightweight and simple to understand, and are used in major cryptographic-centric projects like WireGuard, I2P, Lightning. [Various](https://www.wireguard.com/papers/kobeissi-bhargavan-noise-explorer-2018.pdf) [studies](https://eprint.iacr.org/2019/436.pdf) have assessed the stated security goals of several Noise handshakes with positive results. + +On the other hand, TLS 1.3 is the newest, simplified iteration of TLS. Old, insecure, obsolete ciphers and algorithms have been removed, adopting Ed25519 as the sole ECDH key agreement function. Handshakes are faster, 1-RTT data is supported, and session resumption is a reality, amongst other features. + +Note that [TLS 1.3 is a prerequisite of the QUIC transport](https://tools.ietf.org/html/draft-ietf-quic-transport-22#section-7), although an experiment exists to integrate Noise as the QUIC crypto layer: [nQUIC](https://eprint.iacr.org/2019/028). + +### Why are we using encryption at all? + +Transport level encryption secures message exchange and provides properties that are useful for privacy, safety, and censorship resistance. These properties are derived from the following security guarantees that apply to the entire communication between two peers: + +- Peer authentication: the peer I’m talking to is really who they claim to be, and who I expect them to be. +- Confidentiality: no observer can eavesdrop on the content of our messages. +- Integrity: the data has not been tampered with by a third-party while in transit. +- Non-repudiation: the originating peer cannot dispute that they sent the message. +- Depending on the chosen algorithms and mechanisms (e.g. continuous HMAC), we may obtain additional guarantees, such as non-replayability (this byte could’ve only been sent *now;* e.g. by using continuous HMACs), or perfect forward secrecy (in the case that a peer key is compromised, the content of a past conversation will not be compromised). + +Note that transport-level encryption is not exclusive of application-level encryption or cryptography. Transport-level encryption secures the communication itself, while application-level cryptography is necessary for the application’s use cases (e.g. signatures, randomness, etc.) + +### Will mainnnet networking be untested when it launches? + +Before launching mainnet, the testnet will be switched over to mainnet networking parameters, including Noise handshakes, and other new protocols. This gives us an opportunity to drill coordinated network upgrades and verifying that there are no significant upgradeability gaps. + + +## Gossipsub + +### Why are we using a pub/sub algorithm for block and attestation propagation? + +Pubsub is a technique to broadcast/disseminate data across a network rapidly. Such data is packaged in fire-and-forget messages that do not require a response from every recipient. Peers subscribed to a topic participate in the propagation of messages in that topic. + +The alternative is to maintain a fully connected mesh (all peers connected to each other 1:1), which scales poorly (O(n^2)). + +### Why are we using topics to segregate encodings, yet only support one encoding? + +For future extensibility with almost zero overhead now (besides the extra bytes in the topic name). + +### How do we upgrade gossip channels (e.g. changes in encoding, compression)? + +Such upgrades lead to fragmentation, so they’ll need to be carried out in a coordinated manner most likely during a hard fork. + +### Why are the topics strings and not hashes? + +Topics names have a hierarchical structure. In the future, gossipsub may support wildcard subscriptions (e.g. subscribe to all children topics under a root prefix). Using hashes as topic names would preclude us from leveraging such features going forward. No security guarantees are lost as a result of choosing plaintext topic names, since the domain is finite anyway. + +### Why are there `SHARD_SUBNET_COUNT` subnets, and why is this not defined? + +Depending on the number of validators, it may be more efficient to group shard subnets and might provide better stability for the gossipsub channel. The exact grouping will be dependent on more involved network tests. This constant allows for more flexibility in setting up the network topology for attestation aggregation (as aggregation should happen on each subnet). + +### Why are we sending entire objects in the pubsub and not just hashes? + +Entire objects should be sent to get the greatest propagation speeds. If only hashes are sent, then block and attestation propagation is dependent on recursive requests from each peer. In a hash-only scenario, peers could receive hashes without knowing who to download the actual contents from. Sending entire objects ensures that they get propagated through the entire network. + +### Should clients gossip blocks if they *cannot* validate the proposer signature due to not yet being synced, not knowing the head block, etc? + +The prohibition of unverified-block-gossiping extends to nodes that cannot verify a signature due to not being fully synced to ensure that such (amplified) DOS attacks are not possible. + +### How are we going to discover peers in a gossipsub topic? + +Via discv5 topics. ENRs should not be used for this purpose, as they store identity, location and capability info, not volatile advertisements. + +In the interoperability testnet, all peers will be subscribed to all global beacon chain topics, so discovering peers in specific shard topics will be unnecessary. + +## Req/Resp + +### Why segregate requests into dedicated protocol IDs? + +Requests are segregated by protocol ID to: + +1. Leverage protocol routing in libp2p, such that the libp2p stack will route the incoming stream to the appropriate handler. This allows each the handler function for each request type to be self-contained. For an analogy, think about how you attach HTTP handlers to a REST API server. +2. Version requests independently. In a coarser-grained umbrella protocol, the entire protocol would have to be versioned even if just one field in a single message changed. +3. Enable clients to select the individual requests/versions they support. It would no longer be a strict requirement to support all requests, and clients, in principle, could support a subset of equests and variety of versions. +4. Enable flexibility and agility for clients adopting spec changes that impact the request, by signalling to peers exactly which subset of new/old requests they support. +5. Enable clients to explicitly choose backwards compatibility at the request granularity. Without this, clients would be forced to support entire versions of the coarser request protocol. +6. Parallelise RFCs (or ETH2 EIPs). By decoupling requests from one another, each RFC that affects the request protocol can be deployed/tested/debated independently without relying on a synchronisation point to version the general top-level protocol. + 1. This has the benefit that clients can explicitly choose which RFCs to deploy without buying into all other RFCs that may be included in that top-level version. + 2. Affording this level of granularity with a top-level protocol would imply creating as many variants (e.g. /protocol/43-{a,b,c,d,...}) as the cartesian product of RFCs inflight, O(n^2). +7. Allow us to simplify the payload of requests. Request-id’s and method-ids no longer need to be sent. The encoding/request type and version can all be handled by the framework. + +CAVEAT: the protocol negotiation component in the current version of libp2p is called multistream-select 1.0. It is somewhat naïve and introduces overhead on every request when negotiating streams, although implementation-specific optimizations are possible to save this cost. Multiselect 2.0 will remove this overhead by memoizing previously selected protocols, and modelling shared protocol tables. Fortunately this req/resp protocol is not the expected network bottleneck in the protocol so the additional overhead is not expected to hinder interop testing. More info is to be released from the libp2p community in the coming weeks. + +### Why are messages length-prefixed with a protobuf varint in the SSZ encoding? + +In stream-oriented protocols, we need to delimit messages from one another, so that the reader knows where one message ends and the next one starts. Length-prefixing is an effective solution. Alternatively, one could set a delimiter char/string, but this can readily cause ambiguity if the message itself may contain the delimiter. It also introduces another set of edge cases to model for, thus causing unnecessary complexity, especially if messages are to be compressed (and thus mutated beyond our control). + +That said, in our case, streams are single-use. libp2p streams are full-duplex, and each party is responsible for closing their write side (like in TCP). We therefore use stream closure to mark the end of a request. + +Nevertheless, messages are still length-prefixed to prevent DOS attacks where malicious actors send large amounts of data disguised as a request. A length prefix allows clients to set a maximum limit, and once that limit is read, the client can cease reading and disconnect the stream. This allows a client to determine the exact length of the packet being sent, and it capacitates it to reset the stream early if the other party expresses they intend to send too much data. + +[Protobuf varint](https://developers.google.com/protocol-buffers/docs/encoding#varints) is an efficient technique to encode variable-length ints. Instead of reserving a fixed-size field of as many bytes as necessary to convey the maximum possible value, this field is elastic in exchange for 1-bit overhead per byte. + +### Why do we version protocol strings with ordinals instead of semver? + +Using semver for network protocols is confusing. It is never clear what a change in a field, even if backwards compatible on deserialisation, actually implies. Network protocol agreement should be explicit. Imagine two peers: + +- Peer A supporting v1.1.1 of protocol X. +- Peer B supporting v1.1.2 of protocol X. + +These two peers should never speak to each other because the results can be unpredictable. This is an oversimplification: imagine the same problem with a set of 10 possible versions. We now have 10^2 (100) possible outcomes that peers need to model for. The resulting complexity is unwieldy. + +For this reason, we rely on negotiation of explicit, verbatim protocols. In the above case, peer B would provide backwards compatibility by supporting and advertising both v1.1.1 and v1.1.2 of the protocol. + +Therefore, semver would be relegated to convey expectations at the human level, and it wouldn't do a good job there either, because it's unclear if "backwards-compatibility" and "breaking change" apply only to wire schema level, to behaviour, etc. + +For this reason, we remove semver out of the picture and replace it with ordinals that require explicit agreement and do not mandate a specific policy for changes. + +### Why is it called Req/Resp and not RPC? + +Req/Resp is used to avoid confusion with JSON-RPC and similar user-client interaction mechanisms. + +## Discovery + +### Why are we using discv5 and not libp2p Kademlia DHT? + +discv5 is a standalone protocol, running on UDP on a dedicated port, meant for peer and service discovery only. discv5 supports self-certified, flexible peer records (ENRs) and topic-based advertisement, both of which are, or will be, requirements in this context. + +On the other hand, libp2p Kademlia DHT is a fully-fledged DHT protocol/implementation with content routing and storage capabilities, both of which are irrelevant in this context. + +We assume that ETH1 nodes will evolve to support discv5. By sharing the discovery network between ETH1 and ETH2, we benefit from the additive effect on network size that enhances resilience and resistance against certain attacks, to which smaller networks are more vulnerable. It should also assist light clients of both networks find nodes with specific capabilities. + +discv5 is in the process of being audited. + +### What is the difference between an ENR and a multiaddr, and why are we using ENRs? + +Ethereum Node Records are self-certified node records. Nodes craft and disseminate ENRs for themselves, proving authorship via a cryptographic signature. ENRs are sequentially indexed, enabling conflicts to be resolved. + +ENRs are key-value records with string-indexed ASCII keys. They can store arbitrary information, but EIP-778 specifies a pre-defined dictionary, including IPv4 and IPv6 addresses, secp256k1 public keys, etc. + +Comparing ENRs and multiaddrs is like comparing apples and bananas. ENRs are self-certified containers of identity, addresses, and metadata about a node. Multiaddrs are address strings with the peculiarity that they’re self-describing, composable and future-proof. An ENR can contain multiaddrs, and multiaddrs can be derived securely from the fields of an authenticated ENR. + +discv5 uses ENRs and we will presumably need to: + +1. Add `multiaddr` to the dictionary, so that nodes can advertise their multiaddr under a reserved namespace in ENRs. – and/or – +2. Define a bi-directional conversion function between multiaddrs and the corresponding denormalized fields in an ENR (ip, ip6, tcp, tcp6, etc.), for compatibility with nodes that do not support multiaddr natively (e.g. ETH1 nodes). + +## Compression/Encoding + +### Why are we using SSZ for encoding? + +SSZ is used at the consensus layer and all implementations should have support for ssz encoding/decoding requiring no further dependencies to be added to client implementations. This is a natural choice for serializing objects to be sent across the wire. The actual data in most protocols will be further compressed for efficiency. + +SSZ has well defined schema’s for consensus objects (typically sent across the wire) reducing any serialization schema data that needs to be sent. It also has defined all required types that are required for this network specification. + +### Why are we compressing, and at which layers? + +We compress on the wire to achieve smaller payloads per-message, which, in aggregate, result in higher efficiency, better utilisation of available bandwidth, and overall reduction in network-wide traffic overhead. + +At this time, libp2p does not have an out-of-the-box compression feature that can be dynamically negotiated and layered atop connections and streams, but this will be raised in the libp2p community for consideration. + +This is a non-trivial feature because the behaviour of network IO loops, kernel buffers, chunking, packet fragmentation, amongst others, need to be taken into account. libp2p streams are unbounded streams, whereas compression algorithms work best on bounded byte streams of which we have some prior knowledge. + +Compression tends not to be a one-size-fits-all problem. Lots of variables need careful evaluation, and generic approaches/choices lead to poor size shavings, which may even be counterproductive when factoring in the CPU and memory tradeoff. + +For all these reasons, generically negotiating compression algorithms may be treated as a research problem at the libp2p community, one we’re happy to tackle in the medium-term. + +At this stage, the wisest choice is to consider libp2p a messenger of bytes, and to make application layer participate in compressing those bytes. This looks different depending on the interaction layer: + +- Gossip domain: since gossipsub has a framing protocol and exposes an API, we compress the payload (when dictated by the encoding token in the topic name) prior to publishing the message via the API. No length prefixing is necessary because protobuf takes care of bounding the field in the serialised form. +- Req/Resp domain: since we define custom protocols that operate on byte streams, implementers are encouraged to encapsulate the encoding and compression logic behind MessageReader and MessageWriter components/strategies that can be layered on top of the raw byte streams. + +### Why are using Snappy for compression? + +Snappy is used in Ethereum 1.0. It is well maintained by Google, has good benchmarks and can calculate the size of the uncompressed object without inflating it in memory. This prevents DOS vectors where large uncompressed data is sent. + +### Can I get access to unencrypted bytes on the wire for debugging purposes? + +Yes, you can add loggers in your libp2p protocol handlers to log incoming and outgoing messages. It is recommended to use programming design patterns to encapsulate the logging logic cleanly. + +If your libp2p library relies on frameworks/runtimes such as Netty (jvm) or Node.js (javascript), you can use logging facilities in those frameworks/runtimes to enable message tracing. + +For specific ad-hoc testing scenarios, you can use the [plaintext/2.0.0 secure channel](https://github.com/libp2p/specs/blob/master/plaintext/README.md) (which is essentially no-op encryption or message authentication), in combination with tcpdump or Wireshark to inspect the wire. + +# libp2p Implementations Matrix + +This section will soon contain a matrix showing the maturity/state of the libp2p features required by this spec across the languages in which ETH 2.0 clients are being developed. diff --git a/specs/networking/rpc-interface.md b/specs/networking/rpc-interface.md deleted file mode 100644 index be154075c..000000000 --- a/specs/networking/rpc-interface.md +++ /dev/null @@ -1,283 +0,0 @@ -# Eth 2.0 Networking Spec - RPC Interface - -## Abstract - -The Ethereum 2.0 networking stack uses two modes of communication: a broadcast protocol that gossips information to interested parties via GossipSub, and an RPC protocol that retrieves information from specific clients. This specification defines the RPC protocol. - -The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL", NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119). - -## Dependencies - -This specification assumes familiarity with the [Messaging](./messaging.md), [Node Identification](./node-identification.md), and [Beacon Chain](../core/0_beacon-chain.md) specifications. - -# Specification - -## Message schemas - -Message body schemas are notated like this: - -``` -( - field_name_1: type - field_name_2: type -) -``` - -Embedded types are serialized as SSZ Containers unless otherwise noted. - -All referenced data structures can be found in the [Beacon Chain](../core/0_beacon-chain.md#data-structures) specification. - -## `libp2p` protocol names - -A "Protocol ID" in `libp2p` parlance refers to a human-readable identifier `libp2p` uses in order to identify sub-protocols and stream messages of different types over the same connection. Peers exchange supported protocol IDs via the `Identify` protocol upon connection. When opening a new stream, peers pin a particular protocol ID to it, and the stream remains contextualized thereafter. Since messages are sent inside a stream, they do not need to bear the protocol ID. - -## RPC-over-`libp2p` - -To facilitate RPC-over-`libp2p`, a single protocol name is used: `/eth/serenity/beacon/rpc/1`. The version number in the protocol name is neither backwards or forwards compatible, and will be incremented whenever changes to the below structures are required. - -Remote method calls are wrapped in a "request" structure: - -``` -( - id: uint64 - method_id: uint16 - body: (message_body...) -) -``` - -and their corresponding responses are wrapped in a "response" structure: - -``` -( - id: uint64 - response_code: uint16 - result: bytes -) -``` - -A union type is used to determine the contents of the `body` field in the request structure. Each "body" entry in the RPC calls below corresponds to one subtype in the `body` type union. - -The details of the RPC-Over-`libp2p` protocol are similar to [JSON-RPC 2.0](https://www.jsonrpc.org/specification). Specifically: - -1. The `id` member is REQUIRED. -2. The `id` member in the response MUST be the same as the value of the `id` in the request. -3. The `id` member MUST be unique within the context of a single connection. Monotonically increasing `id`s are RECOMMENDED. -4. The `method_id` member is REQUIRED. -5. The `result` member is REQUIRED on success. -6. The `result` member is OPTIONAL on errors, and MAY contain additional information about the error. -7. `response_code` MUST be `0` on success. - -Structuring RPC requests in this manner allows multiple calls and responses to be multiplexed over the same stream without switching. Note that this implies that responses MAY arrive in a different order than requests. - -The "method ID" fields in the below messages refer to the `method` field in the request structure above. - -The first 1,000 values in `response_code` are reserved for system use. The following response codes are predefined: - -1. `0`: No error. -2. `10`: Parse error. -2. `20`: Invalid request. -3. `30`: Method not found. -4. `40`: Server error. - -### Alternative for non-`libp2p` clients - -Since some clients are waiting for `libp2p` implementations in their respective languages. As such, they MAY listen for raw TCP messages on port `9000`. To distinguish RPC messages from other messages on that port, a byte prefix of `ETH` (`0x455448`) MUST be prepended to all messages. This option will be removed once `libp2p` is ready in all supported languages. - -## Messages - -### Hello - -**Method ID:** `0` - -**Body**: - -``` -( - network_id: uint8 - chain_id: uint64 - finalized_root: bytes32 - finalized_epoch: uint64 - best_root: bytes32 - best_slot: uint64 -) -``` - -Clients exchange `hello` messages upon connection, forming a two-phase handshake. The first message the initiating client sends MUST be the `hello` message. In response, the receiving client MUST respond with its own `hello` message. - -Clients SHOULD immediately disconnect from one another following the handshake above under the following conditions: - -1. If `network_id` belongs to a different chain, since the client definitionally cannot sync with this client. -2. If the `finalized_root` shared by the peer is not in the client's chain at the expected epoch. For example, if Peer 1 in the diagram below has `(root, epoch)` of `(A, 5)` and Peer 2 has `(B, 3)`, Peer 1 would disconnect because it knows that `B` is not the root in their chain at epoch 3: - -``` - Root A - - +---+ - |xxx| +----+ Epoch 5 - +-+-+ - ^ - | - +-+-+ - | | +----+ Epoch 4 - +-+-+ -Root B ^ - | -+---+ +-+-+ -|xxx+<---+--->+ | +----+ Epoch 3 -+---+ | +---+ - | - +-+-+ - | | +-----------+ Epoch 2 - +-+-+ - ^ - | - +-+-+ - | | +-----------+ Epoch 1 - +---+ -``` - -Once the handshake completes, the client with the higher `finalized_epoch` or `best_slot` (if the clients have equal `finalized_epoch`s) SHOULD request beacon block roots from its counterparty via `beacon_block_roots` (i.e. RPC method `10`). - -### Goodbye - -**Method ID:** `1` - -**Body:** - -``` -( - reason: uint64 -) -``` - -Client MAY send `goodbye` messages upon disconnection. The reason field MAY be one of the following values: - -- `1`: Client shut down. -- `2`: Irrelevant network. -- `3`: Fault/error. - -Clients MAY define custom goodbye reasons as long as the value is larger than `1000`. - -### Get status - -**Method ID:** `2` - -**Request body:** - -``` -( - sha: bytes32 - user_agent: bytes - timestamp: uint64 -) -``` - -**Response body:** - -``` -( - sha: bytes32 - user_agent: bytes - timestamp: uint64 -) -``` - -Returns metadata about the remote node. - -### Request beacon block roots - -**Method ID:** `10` - -**Request body** - -``` -( - start_slot: uint64 - count: uint64 -) -``` - -**Response body:** - -``` -# BlockRootSlot -( - block_root: bytes32 - slot: uint64 -) - -( - roots: []BlockRootSlot -) -``` - -Requests a list of block roots and slots from the peer. The `count` parameter MUST be less than or equal to `32768`. The slots MUST be returned in ascending slot order. - -### Beacon block headers - -**Method ID:** `11` - -**Request body** - -``` -( - start_root: HashTreeRoot - start_slot: uint64 - max_headers: uint64 - skip_slots: uint64 -) -``` - -**Response body:** - -``` -( - headers: []BeaconBlockHeader -) -``` - -Requests beacon block headers from the peer starting from `(start_root, start_slot)`. The response MUST contain no more than `max_headers` headers. `skip_slots` defines the maximum number of slots to skip between blocks. For example, requesting blocks starting at slots `2` a `skip_slots` value of `1` would return the blocks at `[2, 4, 6, 8, 10]`. In cases where a slot is empty for a given slot number, the closest previous block MUST be returned. For example, if slot `4` were empty in the previous example, the returned array would contain `[2, 3, 6, 8, 10]`. If slot three were further empty, the array would contain `[2, 6, 8, 10]`—i.e. duplicate blocks MUST be collapsed. A `skip_slots` value of `0` returns all blocks. - -The function of the `skip_slots` parameter helps facilitate light client sync - for example, in [#459](https://github.com/ethereum/eth2.0-specs/issues/459) - and allows clients to balance the peers from whom they request headers. Clients could, for instance, request every 10th block from a set of peers where each peer has a different starting block in order to populate block data. - -### Beacon block bodies - -**Method ID:** `12` - -**Request body:** - -``` -( - block_roots: []HashTreeRoot -) -``` - -**Response body:** - -``` -( - block_bodies: []BeaconBlockBody -) -``` - -Requests the `block_bodies` associated with the provided `block_roots` from the peer. Responses MUST return `block_roots` in the order provided in the request. If the receiver does not have a particular `block_root`, it must return a zero-value `block_body` (i.e. a `block_body` container with all zero fields). - -### Beacon chain state - -*Note*: This section is preliminary, pending the definition of the data structures to be transferred over the wire during fast sync operations. - -**Method ID:** `13` - -**Request body:** - -``` -( - hashes: []HashTreeRoot -) -``` - -**Response body:** TBD - -Requests contain the hashes of Merkle tree nodes that when merkleized yield the block's `state_root`. - -The response will contain the values that, when hashed, yield the hashes inside the request body. From c108444c06f09a9d2f11d98e3934c632d647b53d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 1 Aug 2019 15:47:11 +0100 Subject: [PATCH 071/130] add table of contents; amend heading level. --- specs/networking/p2p-interface.md | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index 72f5c0fd6..3b5da6eb0 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -10,7 +10,33 @@ It consists of four main sections: 4. An analysis of the maturity/state of the libp2p features required by this spec across the languages in which ETH 2.0 clients are being developed. ## Table of Contents -[TOC] + + + + + +- [Network Fundamentals](#network-fundamentals) + - [Transport](#transport) + - [Encryption and identification](#encryption-and-identification) + - [Protocol Negotiation](#protocol-negotiation) + - [Multiplexing](#multiplexing) +- [ETH2 network interaction domains](#eth2-network-interaction-domains) + - [Constants](#constants) + - [The gossip domain: gossipsub](#the-gossip-domain-gossipsub) + - [The discovery domain: discv5](#the-discovery-domain-discv5) + - [The Req/Resp domain](#the-reqresp-domain) +- [Design Decision Rationale](#design-decision-rationale) + - [Transport](#transport-1) + - [Multiplexing](#multiplexing-1) + - [Protocol Negotiation](#protocol-negotiation-1) + - [Encryption](#encryption) + - [Gossipsub](#gossipsub) + - [Req/Resp](#reqresp) + - [Discovery](#discovery) + - [Compression/Encoding](#compressionencoding) +- [libp2p Implementations Matrix](#libp2p-implementations-matrix) + + # Network Fundamentals @@ -529,7 +555,7 @@ We won’t be using it for mainnet because, amongst other things, it requires se SecIO is not considered secure for the purposes of this spec. -## Why are we using Noise/TLS 1.3 for mainnet? +### Why are we using Noise/TLS 1.3 for mainnet? Copied from the Noise Protocol Framework website: From 55f5f106f175d64d48befc910025f1f9c33b39c1 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 10:56:31 -0400 Subject: [PATCH 072/130] Updated type checkers for generalized index functions. --- specs/light_client/merkle_proofs.md | 44 +++++++++++++---------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index dae2a1704..6107e459c 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -17,12 +17,6 @@ -## Constants - -| Name | Value | -| - | - | -| `LENGTH_FLAG` | `2**64 - 1` | - ## Generalized Merkle tree index In a binary Merkle tree, we define a "generalized index" of a node as `2**depth + index`. Visually, this looks as follows: @@ -38,7 +32,8 @@ Note that the generalized index has the convenient property that the two childre ```python def merkle_tree(leaves: List[Bytes32]) -> List[Bytes32]: - o = [0] * len(leaves) + leaves + padded_length = next_power_of_2(len(leaves)) + o = [ZERO_HASH] * padded_length + leaves + [ZERO_HASH] * (padded_length - len(leaves)) for i in range(len(leaves) - 1, 0, -1): o[i] = hash(o[i * 2] + o[i * 2 + 1]) return o @@ -64,27 +59,24 @@ y_data_root len(y) We can now define a concept of a "path", a way of describing a function that takes as input an SSZ object and outputs some specific (possibly deeply nested) member. For example, `foo -> foo.x` is a path, as are `foo -> len(foo.y)` and `foo -> foo.y[5].w`. We'll describe paths as lists, which can have two representations. In "human-readable form", they are `["x"]`, `["y", "__len__"]` and `["y", 5, "w"]` respectively. In "encoded form", they are lists of `uint64` values, in these cases (assuming the fields of `foo` in order are `x` then `y`, and `w` is the first field of `y[i]`) `[0]`, `[1, 2**64-1]`, `[1, 5, 0]`. ```python -def item_length(typ: Type) -> int: +def item_length(typ: SSZType) -> int: """ Returns the number of bytes in a basic type, or 32 (a full hash) for compound types. """ - if typ == bool: - return 1 - elif issubclass(typ, uint): + if issubclass(typ, BasicValue): return typ.byte_len else: return 32 -def get_elem_type(typ: Type, index: int) -> Type: +def get_elem_type(typ: ComplexType, index: int) -> Type: """ Returns the type of the element of an object of the given type with the given index or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) """ - return typ.get_fields_dict()[index] if is_container_type(typ) else typ.elem_type + return typ.get_fields()[key] if issubclass(typ, Container) else typ.elem_type - -def get_chunk_count(typ: Type) -> int: +def chunk_count(typ: SSZType) -> int: """ Returns the number of hashes needed to represent the top-level elements in the given type (eg. `x.foo` or `x[7]` but not `x[7].bar` or `x.foo.baz`). In all cases except lists/vectors @@ -92,24 +84,28 @@ def get_chunk_count(typ: Type) -> int: hash. For lists/vectors of basic types, it is often fewer because multiple basic elements can be packed into one 32-byte chunk. """ - if is_basic_type(typ): + if issubclass(typ, BasicValue): return 1 - elif issubclass(typ, (List, Vector, Bytes, BytesN)): + elif issubclass(typ, Bits): + return (typ.length + 255) // 256 + elif issubclass(typ, Elements): return (typ.length * item_length(typ.elem_type) + 31) // 32 - else: + elif issubclass(typ, Container): return len(typ.get_fields()) + else: + raise Exception(f"Type not supported: {typ}") -def get_item_position(typ: Type, index: Union[int, str]) -> Tuple[int, int, int]: +def get_item_position(typ: SSZType, index: Union[int, str]) -> Tuple[int, int, int]: """ Returns three variables: (i) the index of the chunk in which the given element of the item is represented, (ii) the starting byte position, (iii) the ending byte position. For example for a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16) """ - if issubclass(typ, (List, Vector, Bytes, BytesN)): + if issubclass(typ, Elements): start = index * item_length(typ.elem_type) return start // 32, start % 32, start % 32 + item_length(typ.elem_type) - elif is_container_type(typ): + elif issubclass(typ, Container): return typ.get_field_names().index(index), 0, item_length(get_elem_type(typ, index)) else: raise Exception("Only lists/vectors/containers supported") @@ -122,12 +118,12 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> Generalized """ root = 1 for p in path: - assert not is_basic_type(typ) # If we descend to a basic type, the path cannot continue further + assert not issubclass(typ, BasicValue) # If we descend to a basic type, the path cannot continue further if p == '__len__': typ, root = uint256, root * 2 + 1 if issubclass(typ, (List, Bytes)) else None else: pos, _, _ = get_item_position(typ, p) - root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(get_chunk_count(typ)) + pos + root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(chunk_count(typ)) + pos typ = get_elem_type(typ, p) return root ``` @@ -197,7 +193,7 @@ def get_branch_indices(tree_index: int) -> List[int]: def get_expanded_indices(indices: List[int]) -> List[int]: """ Get the generalized indices of all chunks in the tree needed to prove the chunks with the given - generalized indices. + generalized indices, including the leaves. """ branches = set() for index in indices: From 1ba03b4c33a6ade51dbd60dae9a6a15acd7be531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 1 Aug 2019 15:56:53 +0100 Subject: [PATCH 073/130] gossip domain: clarify why we use plaintext topic names. --- specs/networking/p2p-interface.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index 3b5da6eb0..4480773b3 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -604,7 +604,11 @@ Such upgrades lead to fragmentation, so they’ll need to be carried out in a co ### Why are the topics strings and not hashes? -Topics names have a hierarchical structure. In the future, gossipsub may support wildcard subscriptions (e.g. subscribe to all children topics under a root prefix). Using hashes as topic names would preclude us from leveraging such features going forward. No security guarantees are lost as a result of choosing plaintext topic names, since the domain is finite anyway. +Topics names have a hierarchical structure. In the future, gossipsub may support wildcard subscriptions (e.g. subscribe to all children topics under a root prefix) by way of prefix matching. Enforcing hashes for topic names would preclude us from leveraging such features going forward. + +No security or privacy guarantees are lost as a result of choosing plaintext topic names, since the domain is finite anyway, and calculating a digest's preimage would be trivial. + +Furthermore, the ETH2 topic names are shorter their digest equivalents (asuming SHA-256 hash), so hashing topics would bloat messages unnecessarily. ### Why are there `SHARD_SUBNET_COUNT` subnets, and why is this not defined? From 3d4dde412bdf9e5817e7f63cdb3d7cdb9beddc54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 1 Aug 2019 16:04:59 +0100 Subject: [PATCH 074/130] document doctoc command for posterity. --- specs/networking/p2p-interface.md | 1 + 1 file changed, 1 insertion(+) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index 4480773b3..20fc803c8 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -11,6 +11,7 @@ It consists of four main sections: ## Table of Contents + From 725bdf822340db5048c161b54192f457bcde1ba2 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 11:40:40 -0400 Subject: [PATCH 075/130] Update specs/light_client/merkle_proofs.md --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 6107e459c..16cbd2908 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -120,7 +120,7 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> Generalized for p in path: assert not issubclass(typ, BasicValue) # If we descend to a basic type, the path cannot continue further if p == '__len__': - typ, root = uint256, root * 2 + 1 if issubclass(typ, (List, Bytes)) else None + typ, root = uint64, root * 2 + 1 if issubclass(typ, (List, Bytes)) else None else: pos, _, _ = get_item_position(typ, p) root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(chunk_count(typ)) + pos From 02bb92e71455adaa7da101563a6c367efe9e1cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 1 Aug 2019 16:57:04 +0100 Subject: [PATCH 076/130] fmt. --- specs/networking/p2p-interface.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index 20fc803c8..66b1fa694 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -313,8 +313,8 @@ The responder SHOULD send a response promptly, starting with a **single-byte** r It can have one of the following values: - 0: **Success** -- a normal response follows, with contents matching the expected message schema and encoding specified in the request. -- 1: **InvalidRequest** -- the contents of the request are semantically invalid, or the payload is malformed, or could not be understood. The response payload adheres to the ErrorMessage schema (described below). -- 2: **ServerError** -- the responder encountered an error while processing the request. The response payload adheres to the ErrorMessage schema (described below). +- 1: **InvalidRequest** -- the contents of the request are semantically invalid, or the payload is malformed, or could not be understood. The response payload adheres to the `ErrorMessage` schema (described below). +- 2: **ServerError** -- the responder encountered an error while processing the request. The response payload adheres to the `ErrorMessage` schema (described below). Clients MAY use response codes above `128` to indicate alternative, erroneous request-specific responses. From 8563dbf5c0b21c76b41b0c473911bc19313e5204 Mon Sep 17 00:00:00 2001 From: protolambda Date: Thu, 1 Aug 2019 22:03:40 +0200 Subject: [PATCH 077/130] make ssz_static output roots to roots.yaml instead of meta --- specs/test_formats/ssz_static/core.md | 8 +++----- test_generators/ssz_static/main.py | 9 ++++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/specs/test_formats/ssz_static/core.md b/specs/test_formats/ssz_static/core.md index 1816e7d4d..a7301a3c8 100644 --- a/specs/test_formats/ssz_static/core.md +++ b/specs/test_formats/ssz_static/core.md @@ -18,16 +18,14 @@ One can iterate over the handlers, and select the type based on the handler name Suites are then the same format, but each specialized in one randomization mode. Some randomization modes may only produce a single test case (e.g. the all-zeroes case). -The output parts are: `meta.yaml`, `serialized.ssz`, `value.yaml` +The output parts are: `roots.yaml`, `serialized.ssz`, `value.yaml` -### `meta.yaml` - -For non-container SSZ type: +### `roots.yaml` ```yaml root: bytes32 -- string, hash-tree-root of the value, hex encoded, with prefix 0x signing_root: bytes32 -- string, signing-root of the value, hex encoded, with prefix 0x. - Optional, present if type is a container and ends with a ``signature`` field. + *Optional*, present if type is a container and ends with a ``signature`` field. ``` ### `serialized.ssz` diff --git a/test_generators/ssz_static/main.py b/test_generators/ssz_static/main.py index c9c45a5a0..32178cfe0 100644 --- a/test_generators/ssz_static/main.py +++ b/test_generators/ssz_static/main.py @@ -21,9 +21,12 @@ def create_test_case(rng: Random, typ, mode: random_value.RandomizationMode, cha value = random_value.get_random_ssz_object(rng, typ, MAX_BYTES_LENGTH, MAX_LIST_LENGTH, mode, chaos) yield "value", "data", encode.encode(value) yield "serialized", "ssz", serialize(value) - yield "root", "meta", '0x' + hash_tree_root(value).hex() - if hasattr(value, "signature"): - yield "signing_root", "meta", '0x' + signing_root(value).hex() + roots_data = { + "root": '0x' + hash_tree_root(value).hex() + } + if isinstance(value, Container) and hasattr(value, "signature"): + roots_data["signing_root"] = '0x' + signing_root(value).hex() + yield "roots", "data", roots_data def get_spec_ssz_types(): From 12900b2b4c0ceb489905b83e35788f3674692167 Mon Sep 17 00:00:00 2001 From: protolambda Date: Thu, 1 Aug 2019 22:40:10 +0200 Subject: [PATCH 078/130] handle empty test ouputs, and split out output format functions --- test_libs/gen_helpers/gen_base/gen_runner.py | 54 ++++++++++++-------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/test_libs/gen_helpers/gen_base/gen_runner.py b/test_libs/gen_helpers/gen_base/gen_runner.py index 1eb6bac56..32f3594b3 100644 --- a/test_libs/gen_helpers/gen_base/gen_runner.py +++ b/test_libs/gen_helpers/gen_base/gen_runner.py @@ -1,7 +1,7 @@ import argparse from pathlib import Path import sys -from typing import Iterable +from typing import Iterable, AnyStr, Any, Callable from ruamel.yaml import ( YAML, @@ -124,34 +124,48 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]): print(f'Generating test: {case_dir}') try: - case_dir.mkdir(parents=True, exist_ok=True) + def output_part(out_kind: str, name: str, fn: Callable[[Path, ], None]): + # make sure the test case directory is created before any test part is written. + case_dir.mkdir(parents=True, exist_ok=True) + try: + fn(case_dir) + except IOError as e: + sys.exit(f'Error when dumping test "{case_dir}", part "{name}", kind "{out_kind}": {e}') + + written_part = False meta = dict() for (name, out_kind, data) in test_case.case_fn(): + written_part = True if out_kind == "meta": meta[name] = data if out_kind == "data": - try: - out_path = case_dir / Path(name + '.yaml') - with out_path.open(file_mode) as f: - yaml.dump(data, f) - except IOError as e: - sys.exit(f'Error when dumping test "{case_dir}", part "{name}", kind "{out_kind}": {e}') + output_part("data", name, dump_yaml_fn(data, name, file_mode, yaml)) if out_kind == "ssz": - try: - out_path = case_dir / Path(name + '.ssz') - with out_path.open(file_mode + 'b') as f: # write in raw binary mode - f.write(data) - except IOError as e: - sys.exit(f'Error when dumping test "{case_dir}", part "{name}", kind "{out_kind}": {e}') + output_part("ssz", name, dump_ssz_fn(data, name, file_mode)) # Once all meta data is collected (if any), write it to a meta data file. if len(meta) != 0: - try: - out_path = case_dir / Path('meta.yaml') - with out_path.open(file_mode) as f: - yaml.dump(meta, f) - except IOError as e: - sys.exit(f'Error when dumping test "{case_dir}" meta data": {e}') + written_part = True + output_part("data", "meta", dump_yaml_fn(meta, "meta", file_mode, yaml)) + + if not written_part: + print(f"test case {case_dir} did not produce any test case parts") except Exception as e: print(f"ERROR: failed to generate vector(s) for test {case_dir}: {e}") print(f"completed {generator_name}") + + +def dump_yaml_fn(data: Any, name: str, file_mode: str, yaml_encoder: YAML): + def dump(case_path: Path): + out_path = case_path / Path(name + '.yaml') + with out_path.open(file_mode) as f: + yaml_encoder.dump(data, f) + return dump + + +def dump_ssz_fn(data: AnyStr, name: str, file_mode: str): + def dump(case_path: Path): + out_path = case_path / Path(name + '.ssz') + with out_path.open(file_mode + 'b') as f: # write in raw binary mode + f.write(data) + return dump From 1b852adef1d48e51e38d5f5c9f760345a766202d Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 18:11:30 -0400 Subject: [PATCH 079/130] Simplified merkle multiproofs --- specs/light_client/merkle_proofs.md | 134 +++++++++++++--------------- 1 file changed, 63 insertions(+), 71 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 16cbd2908..8f9b14fb5 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -157,11 +157,32 @@ def get_generalized_index_length(index: GeneralizedIndex) -> int: #### `get_generalized_index_bit` ```python -def get_generalized_index_bit(index: GeneralizedIndex, bit: int) -> bool: +def get_generalized_index_bit(index: GeneralizedIndex, position: int) -> bool: """ - Returns the i'th bit of a generalized index. + Returns the given bit of a generalized index. """ - return (index & (1 << bit)) > 0 + return (index & (1 << position)) > 0 +``` + +#### `generalized_index_sibling` + +```python +def generalized_index_sibling(index: GeneralizedIndex) -> GeneralizedIndex: + return index ^ 1 +``` + +#### `generalized_index_child` + +```python +def generalized_index_child(index: GeneralizedIndex, right_side: bool) -> GeneralizedIndex: + return index * 2 + right_side +``` + +#### `generalized_index_parent` + +```python +def generalized_index_parent(index: GeneralizedIndex) -> GeneralizedIndex: + return index // 2 ``` ## Merkle multiproofs @@ -180,38 +201,57 @@ x x . . . . x * First, we provide a method for computing the generalized indices of the auxiliary tree nodes that a proof of a given set of generalized indices will require: ``` -def get_branch_indices(tree_index: int) -> List[int]: +def get_branch_indices(tree_index: GeneralizedIndex) -> List[GeneralizedIndex]: """ Get the generalized indices of the sister chunks along the path from the chunk with the given tree index to the root. """ - o = [tree_index ^ 1] + o = [generalized_index_sibling(tree_index)] while o[-1] > 1: - o.append((o[-1] // 2) ^ 1) + o.append(generalized_index_sibling(generalized_index_parent(o[-1]))) return o[:-1] -def get_expanded_indices(indices: List[int]) -> List[int]: +def get_helper_indices(indices: List[GeneralizedIndex]) -> List[GeneralizedIndex]: """ - Get the generalized indices of all chunks in the tree needed to prove the chunks with the given - generalized indices, including the leaves. + Get the generalized indices of all "extra" chunks in the tree needed to prove the chunks with the given + generalized indices. Note that the decreasing order is chosen deliberately to ensure equivalence to the + order of hashes in a regular single-item Merkle proof in the single-item case. """ - branches = set() + all_indices = set() for index in indices: - branches = branches.union(set(get_branch_indices(index) + [index])) - return sorted([x for x in branches if x*2 not in branches or x*2+1 not in branches])[::-1] + all_indices = all_indices.union(set(get_branch_indices(index) + [index])) + + return sorted([ + x for x in all_indices if not + (generalized_index_child(x, 0) in all_indices and generalized_index_child(x, 1) in all_indices) and not + (x in indices) + ])[::-1] ``` -Generating a proof that covers paths `p1 ... pn` is simply a matter of taking the chunks in the SSZ hash tree with generalized indices `get_expanded_indices([p1 ... pn])`. - -We now provide the bulk of the proving machinery, a function that takes a `{generalized_index: chunk}` map and fills in chunks that can be inferred (inferring the parent by hashing its two children): +Now we provide the Merkle proof verification functions. First, for single item proofs: ```python -def fill(objects: Dict[int, Bytes32]) -> Dict[int, Bytes32]: - """ - Fills in chunks that can be inferred from other chunks. For a set of chunks that constitutes - a valid proof, this includes the root (generalized index 1). - """ - objects = {k: v for k, v in objects.items()} +def verify_merkle_proof(leaf: Hash, proof: Sequence[Hash], index: GeneralizedIndex, root: Hash) -> bool: + assert len(proof) == get_generalized_index_length(index) + for i, h in enumerate(proof): + if get_generalized_index_bit(index, i): + leaf = hash(h + leaf) + else: + leaf = hash(leaf + h) + return leaf == root +``` + +Now for multi-item proofs: + +```python +def verify_merkle_multiproof(leaves: Sequence[Hash], proof: Sequence[Hash], indices: Sequence[GeneralizedIndex], root: Hash) -> bool: + assert len(leaves) == len(indices) + helper_indices = get_helper_indices(indices) + assert len(proof) == len(helper_indices) + objects = { + **{index:node for index, node in zip(indices, leaves)}, + **{index:node for index, node in zip(helper_indices, proof)} + } keys = sorted(objects.keys())[::-1] pos = 0 while pos < len(keys): @@ -220,55 +260,7 @@ def fill(objects: Dict[int, Bytes32]) -> Dict[int, Bytes32]: objects[k // 2] = hash(objects[k & -2] + objects[k | 1]) keys.append(k // 2) pos += 1 - # Completeness and consistency check - assert 1 in objects - for k in objects: - if k > 1: - assert objects[k // 2] == hash(objects[k & -2] + objects[k | 1]) - return objects + return objects[1] == root ``` -## MerklePartial - -We define a container that encodes an SSZ partial, and provide the methods for converting it into a `{generalized_index: chunk}` map, for which we provide a method to extract individual values. To determine the hash tree root of an object represented by an SSZ partial, simply check `decode_ssz_partial(partial)[1]`. - -### `SSZMerklePartial` - -```python -class SSZMerklePartial(Container): - indices: List[uint64, 2**32] - chunks: List[Bytes32, 2**32] -``` - -### `decode_ssz_partial` - -```python -def decode_ssz_partial(encoded: SSZMerklePartial) -> Dict[int, Bytes32]: - """ - Decodes an encoded SSZ partial into a generalized index -> chunk map, and verify hash consistency. - """ - full_indices = get_expanded_indices(encoded.indices) - return fill({k:v for k,v in zip(full_indices, encoded.chunks)}) -``` - -### `extract_value_at_path` - -```python -def extract_value_at_path(chunks: Dict[int, Bytes32], typ: Type, path: List[Union[int, str]]) -> Any: - """ - Provides the value of the element in the object represented by the given encoded SSZ partial at - the given path. Returns a KeyError if that path is not covered by this SSZ partial. - """ - root = 1 - for p in path: - if p == '__len__': - return deserialize_basic(chunks[root * 2 + 1][:8], uint64) - if issubclass(typ, (List, Bytes)): - assert 0 <= p < deserialize_basic(chunks[root * 2 + 1][:8], uint64) - pos, start, end = get_item_position(typ, p) - root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(get_chunk_count(typ)) + pos - typ = get_elem_type(typ, p) - return deserialize_basic(chunks[root][start: end], typ) -``` - -Here [link TBD] is a python implementation of SSZ partials that represents them as a class that can be read and written to just like the underlying objects, so you can eg. perform state transitions on SSZ partials and compute the resulting root +Note that the single-item proof is a special case of a multi-item proof; a valid single-item proof verifies correctly when put into the multi-item verification function (making the natural trivial changes to input arguments, `index -> [index]` and `leaf -> [leaf]`). From 44bd00164ed272e92d8a8a47c267502e8d4ae6e8 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 18:11:45 -0400 Subject: [PATCH 080/130] Update specs/light_client/merkle_proofs.md Co-Authored-By: Danny Ryan --- specs/light_client/merkle_proofs.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 8f9b14fb5..fcc8031a8 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -75,7 +75,8 @@ def get_elem_type(typ: ComplexType, index: int) -> Type: or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) """ return typ.get_fields()[key] if issubclass(typ, Container) else typ.elem_type - + + def chunk_count(typ: SSZType) -> int: """ Returns the number of hashes needed to represent the top-level elements in the given type From e93e7a3f1a29179bdfa8bd9f606a79927fa0c610 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 18:12:27 -0400 Subject: [PATCH 081/130] Update specs/light_client/merkle_proofs.md --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index fcc8031a8..af0ff760d 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -69,7 +69,7 @@ def item_length(typ: SSZType) -> int: return 32 -def get_elem_type(typ: ComplexType, index: int) -> Type: +def get_elem_type(typ: ComplexType, index: Union[int, str]) -> Type: """ Returns the type of the element of an object of the given type with the given index or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) From 2e7c8fa529d1385cb10d990b6b150b75bafff7c3 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 18:12:35 -0400 Subject: [PATCH 082/130] Update specs/light_client/merkle_proofs.md --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index af0ff760d..afa42f184 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -74,7 +74,7 @@ def get_elem_type(typ: ComplexType, index: Union[int, str]) -> Type: Returns the type of the element of an object of the given type with the given index or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) """ - return typ.get_fields()[key] if issubclass(typ, Container) else typ.elem_type + return typ.get_fields()[index] if issubclass(typ, Container) else typ.elem_type def chunk_count(typ: SSZType) -> int: From 5248bb6a15134ebc949e90e481c6e6ad7920a035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Fri, 2 Aug 2019 12:35:50 +0100 Subject: [PATCH 083/130] apply editorial suggestions. Co-Authored-By: Hsiao-Wei Wang Co-Authored-By: Preston Van Loon --- specs/networking/p2p-interface.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index 66b1fa694..fa87635e0 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -79,13 +79,13 @@ The following SecIO parameters MUST be supported by all stacks: - Key agreement: ECDH-P256. - Cipher: AES-128. -- Digest: SHA256. +- Digest: SHA-256. #### Mainnet [Noise Framework](http://www.noiseprotocol.org/) handshakes will be used for mainnet. libp2p Noise support [is in the process of being standardised](https://github.com/libp2p/specs/issues/195) in the libp2p project. -Noise support will presumably include IX, IK and XX handshake patterns, and may rely on Curve25519 keys, ChaCha20 and Poly1305 ciphers, and SHA256 as a hash function. These aspects are being actively debated in the referenced issue [ETH 2.0 implementers are welcome to comment and contribute to the discussion.] +Noise support will presumably include IX, IK and XX handshake patterns, and may rely on Curve25519 keys, ChaCha20 and Poly1305 ciphers, and SHA-256 as a hash function. These aspects are being actively debated in the referenced issue [ETH 2.0 implementers are welcome to comment and contribute to the discussion.] ## Protocol Negotiation @@ -427,7 +427,7 @@ Response Content: ) ``` -Requests count beacon blocks from the peer starting from `start_slot` on the chain defined by `head_block_root`. The response MUST contain no more than count blocks. step defines the slot increment between blocks. For example, requesting blocks starting at `start_slot` 2 with a step value of 2 would return the blocks at [2, 4, 6, …]. In cases where a slot is empty for a given slot number, no block is returned. For example, if slot 4 were empty in the previous example, the returned array would contain [2, 6, …]. A step value of 1 returns all blocks on the range `[start_slot, start_slot + count)`. +Requests count beacon blocks from the peer starting from `start_slot` on the chain defined by `head_block_root`. The response MUST contain no more than count blocks. `step` defines the slot increment between blocks. For example, requesting blocks starting at `start_slot` 2 with a step value of 2 would return the blocks at [2, 4, 6, …]. In cases where a slot is empty for a given slot number, no block is returned. For example, if slot 4 were empty in the previous example, the returned array would contain [2, 6, …]. A step value of 1 returns all blocks on the range `[start_slot, start_slot + count)`. `BeaconBlocks` is primarily used to sync historical blocks. @@ -609,7 +609,7 @@ Topics names have a hierarchical structure. In the future, gossipsub may support No security or privacy guarantees are lost as a result of choosing plaintext topic names, since the domain is finite anyway, and calculating a digest's preimage would be trivial. -Furthermore, the ETH2 topic names are shorter their digest equivalents (asuming SHA-256 hash), so hashing topics would bloat messages unnecessarily. +Furthermore, the ETH2 topic names are shorter their digest equivalents (assuming SHA-256 hash), so hashing topics would bloat messages unnecessarily. ### Why are there `SHARD_SUBNET_COUNT` subnets, and why is this not defined? From f3c11852d79bc5488b016c0538e2dd0a24609237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Fri, 2 Aug 2019 12:38:59 +0100 Subject: [PATCH 084/130] apply more editorial suggestions. Co-Authored-By: Hsiao-Wei Wang --- specs/networking/p2p-interface.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index fa87635e0..2661ecdb0 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -229,7 +229,7 @@ Specifications of these parameters can be found in the [ENR Specification](http: In the interoperability testnet, all peers will support all capabilities defined in this document (gossip, full Req/Resp suite, discovery protocol), therefore the ENR record does not need to carry ETH2 capability information, as it would be superfluous. -Nonetheless, ENRs MUST carry a generic `eth2` key with nil value, denoting that the peer is indeed a ETH2 peer, in order to eschew connecting to ETH1 peers. +Nonetheless, ENRs MUST carry a generic `eth2` key with nil value, denoting that the peer is indeed an ETH2 peer, in order to eschew connecting to ETH1 peers. #### Mainnet @@ -609,7 +609,7 @@ Topics names have a hierarchical structure. In the future, gossipsub may support No security or privacy guarantees are lost as a result of choosing plaintext topic names, since the domain is finite anyway, and calculating a digest's preimage would be trivial. -Furthermore, the ETH2 topic names are shorter their digest equivalents (assuming SHA-256 hash), so hashing topics would bloat messages unnecessarily. +Furthermore, the ETH2 topic names are shorter than their digest equivalents (assuming SHA-256 hash), so hashing topics would bloat messages unnecessarily. ### Why are there `SHARD_SUBNET_COUNT` subnets, and why is this not defined? @@ -637,7 +637,7 @@ Requests are segregated by protocol ID to: 1. Leverage protocol routing in libp2p, such that the libp2p stack will route the incoming stream to the appropriate handler. This allows each the handler function for each request type to be self-contained. For an analogy, think about how you attach HTTP handlers to a REST API server. 2. Version requests independently. In a coarser-grained umbrella protocol, the entire protocol would have to be versioned even if just one field in a single message changed. -3. Enable clients to select the individual requests/versions they support. It would no longer be a strict requirement to support all requests, and clients, in principle, could support a subset of equests and variety of versions. +3. Enable clients to select the individual requests/versions they support. It would no longer be a strict requirement to support all requests, and clients, in principle, could support a subset of requests and variety of versions. 4. Enable flexibility and agility for clients adopting spec changes that impact the request, by signalling to peers exactly which subset of new/old requests they support. 5. Enable clients to explicitly choose backwards compatibility at the request granularity. Without this, clients would be forced to support entire versions of the coarser request protocol. 6. Parallelise RFCs (or ETH2 EIPs). By decoupling requests from one another, each RFC that affects the request protocol can be deployed/tested/debated independently without relying on a synchronisation point to version the general top-level protocol. From c29d85aafb99639930e49129e0c173082de450f5 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:40:26 -0400 Subject: [PATCH 085/130] Update specs/core/1_shard-data-chains.md Co-Authored-By: Hsiao-Wei Wang --- specs/core/1_shard-data-chains.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index c4d8e2701..f284574f2 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -432,7 +432,7 @@ def shard_slot_transition(state: ShardState, beacon_state: BeaconState) -> None: # Save states in history accumulator depth = 0 h = hash_tree_root(state) - while state.slot % 2**depth == 0: + while state.slot % 2**depth == 0 and depth <= HISTORY_ACCUMULATOR_VECTOR: state.history_accumulator[depth] = h depth += 1 From ddd43ad99d1e75affe77627d9f0c06f7a2825a35 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:40:49 -0400 Subject: [PATCH 086/130] <= becomes < --- specs/core/1_shard-data-chains.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index f284574f2..317011716 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -432,7 +432,7 @@ def shard_slot_transition(state: ShardState, beacon_state: BeaconState) -> None: # Save states in history accumulator depth = 0 h = hash_tree_root(state) - while state.slot % 2**depth == 0 and depth <= HISTORY_ACCUMULATOR_VECTOR: + while state.slot % 2**depth == 0 and depth < HISTORY_ACCUMULATOR_VECTOR: state.history_accumulator[depth] = h depth += 1 From 0a874528a8e9ce31703554133393405b6c4ed438 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:43:03 -0400 Subject: [PATCH 087/130] Update specs/light_client/merkle_proofs.md Co-Authored-By: Danny Ryan --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index afa42f184..9afa96738 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -201,7 +201,7 @@ x x . . . . x * First, we provide a method for computing the generalized indices of the auxiliary tree nodes that a proof of a given set of generalized indices will require: -``` +```python def get_branch_indices(tree_index: GeneralizedIndex) -> List[GeneralizedIndex]: """ Get the generalized indices of the sister chunks along the path from the chunk with the From 845daf5b1f22d6b4e91c2713c8887f41d78750de Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:43:24 -0400 Subject: [PATCH 088/130] Update specs/light_client/merkle_proofs.md Co-Authored-By: Diederik Loerakker --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 9afa96738..2a1103ca2 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -100,7 +100,7 @@ def chunk_count(typ: SSZType) -> int: def get_item_position(typ: SSZType, index: Union[int, str]) -> Tuple[int, int, int]: """ Returns three variables: (i) the index of the chunk in which the given element of the item is - represented, (ii) the starting byte position, (iii) the ending byte position. For example for + represented, (ii) the starting byte position within the chunk, (iii) the ending byte position within the chunk. For example for a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16) """ if issubclass(typ, Elements): From 59307d1380de871ac1d031c48c781aae0c0c60b1 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:43:42 -0400 Subject: [PATCH 089/130] Update specs/light_client/merkle_proofs.md Co-Authored-By: Diederik Loerakker --- specs/light_client/merkle_proofs.md | 1 + 1 file changed, 1 insertion(+) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 2a1103ca2..bf6cd7a61 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -85,6 +85,7 @@ def chunk_count(typ: SSZType) -> int: hash. For lists/vectors of basic types, it is often fewer because multiple basic elements can be packed into one 32-byte chunk. """ + # typ.length describes the limit for list types, or the length for vector types. if issubclass(typ, BasicValue): return 1 elif issubclass(typ, Bits): From c6cdec8217b41f2cfa20f4f117fbaac47a4c89ab Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:45:26 -0400 Subject: [PATCH 090/130] Fixed get generalized indices --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index bf6cd7a61..038990709 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -142,7 +142,7 @@ def concat_generalized_indices(*indices: Sequence[GeneralizedIndex]) -> Generali """ o = GeneralizedIndex(1) for i in indices: - o = o * get_previous_power_of_2(i) + i + o = o * get_previous_power_of_2(i) + (i - get_previous_power_of_2(i)) return o ``` From c8d128aa59c5dc3c9ae419ce30b20a028444af8f Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:46:59 -0400 Subject: [PATCH 091/130] Update specs/light_client/merkle_proofs.md Co-Authored-By: Diederik Loerakker --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 038990709..2e00806ee 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -259,7 +259,7 @@ def verify_merkle_multiproof(leaves: Sequence[Hash], proof: Sequence[Hash], indi while pos < len(keys): k = keys[pos] if k in objects and k ^ 1 in objects and k // 2 not in objects: - objects[k // 2] = hash(objects[k & -2] + objects[k | 1]) + objects[k // 2] = hash(objects[(k | 1) ^ 1] + objects[k | 1]) keys.append(k // 2) pos += 1 return objects[1] == root From 6138edc5bebd99c87759161e277c4d57823ad9a0 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:49:35 -0400 Subject: [PATCH 092/130] log -> log2 --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 2e00806ee..469f347ac 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -153,7 +153,7 @@ def get_generalized_index_length(index: GeneralizedIndex) -> int: """ Returns the length of a path represented by a generalized index. """ - return log(index) + return log2(index) ``` #### `get_generalized_index_bit` From 5237ac4954382e3b9f5a17dd9bf9fd6df0637876 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:57:32 -0400 Subject: [PATCH 093/130] Update specs/light_client/merkle_proofs.md --- specs/light_client/merkle_proofs.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 469f347ac..e0be4f070 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -132,6 +132,8 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> Generalized ### Helpers for generalized indices +_Usage note: functions outside this section should manipulate generalized indices using only functions inside this section. This is to make it easier for developers to implement generalized indices with underlying representations other than bigints._ + #### `concat_generalized_indices` ```python From 514ff5814f233836eba8b45d9bb5d94b97d7a60e Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Fri, 2 Aug 2019 21:12:40 +0200 Subject: [PATCH 094/130] Updates * constants -> configurations * constant name updates * initial validation requirement for attestations * allow aggregated attestations to be published * move discv5 down a bit * additional rationale --- specs/networking/p2p-interface.md | 143 +++++++++++++++++------------- 1 file changed, 80 insertions(+), 63 deletions(-) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index 2661ecdb0..ed2047190 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -22,10 +22,10 @@ It consists of four main sections: - [Protocol Negotiation](#protocol-negotiation) - [Multiplexing](#multiplexing) - [ETH2 network interaction domains](#eth2-network-interaction-domains) - - [Constants](#constants) + - [Configuration](#configuration) - [The gossip domain: gossipsub](#the-gossip-domain-gossipsub) - - [The discovery domain: discv5](#the-discovery-domain-discv5) - [The Req/Resp domain](#the-reqresp-domain) + - [The discovery domain: discv5](#the-discovery-domain-discv5) - [Design Decision Rationale](#design-decision-rationale) - [Transport](#transport-1) - [Multiplexing](#multiplexing-1) @@ -89,6 +89,8 @@ Noise support will presumably include IX, IK and XX handshake patterns, and may ## Protocol Negotiation +Clients MUST use exact equality when negotiating protocol versions to use and MAY use the version to give priority to higher version numbers. + #### Interop Connection-level and stream-level (see the rationale section below for explanations) protocol negotiation MUST be conducted using [multistream-select v1.0](https://github.com/multiformats/multistream-select/). Its protocol ID is: `/multistream/1.0.0`. @@ -107,16 +109,15 @@ Clients MUST support [mplex](https://github.com/libp2p/specs/tree/master/mplex) # ETH2 network interaction domains -## Constants +## Configuration This section outlines constants that are used in this spec. -- `RQRP_MAX_SIZE`: The max size of uncompressed req/resp messages that clients will allow. - Value: TBD -- `GOSSIP_MAX_SIZE`: The max size of uncompressed gossip messages - Value: 1MB (estimated from expected largest uncompressed block size). -- `SHARD_SUBNET_COUNT`: The number of shard subnets used in the gossipsub protocol. - Value: TBD +| `REQ_RESP_MAX_SIZE` | `TODO` | The max size of uncompressed req/resp messages that clients will allow. | +| `GOSSIP_MAX_SIZE` | `2**20` (= 1048576, 1 MiB) | The max size of uncompressed gossip messages | +| `SHARD_SUBNET_COUNT` | `TODO` | The number of shard subnets used in the gossipsub protocol. | +| `TTFB_TIMEOUT` | `5s` | Maximum time to wait for first byte of request response (time-to-first-byte) | +| `RESP_TIMEOUT` | `10s` | Maximum time for complete response transfer | ## The gossip domain: gossipsub @@ -128,7 +129,7 @@ Clients MUST support the [gossipsub](https://github.com/libp2p/specs/tree/master *Note: Parameters listed here are subject to a large-scale network feasibility study.* -The following gossipsub parameters will be used: +The following gossipsub [parameters](https://github.com/libp2p/specs/tree/master/pubsub/gossipsub#meshsub-an-overlay-mesh-router) will be used: - `D` (topic stable mesh target count): 6 - `D_low` (topic stable mesh low watermark): 4 @@ -147,8 +148,8 @@ Topic strings have form: `/eth2/TopicName/TopicEncoding`. This defines both the There are two main topics used to propagate attestations and beacon blocks to all nodes on the network. Their `TopicName`'s are: -- `beacon_block` - This topic is used solely for propagating new beacon blocks to all nodes on the networks. Blocks are sent in their entirety. Clients who receive a block on this topic MUST validate the block proposer signature before forwarding it across the network. -- `beacon_attestation` - This topic is used to propagate aggregated attestations (in their entirety) to subscribing nodes (typically block proposers) to be included in future blocks. Similarly to beacon blocks, clients will be expected to perform some sort of validation before forwarding, but the precise mechanism is still TBD. +- `beacon_block` - This topic is used solely for propagating new beacon blocks to all nodes on the networks. Blocks are sent in their entirety. Clients MUST validate the block proposer signature before forwarding it across the network. +- `beacon_attestation` - This topic is used to propagate aggregated attestations (in their entirety) to subscribing nodes (typically block proposers) to be included in future blocks. Clients MUST validate that the block being voted for passes validation before forwarding the attestation on the network (TODO: [additional validations](https://github.com/ethereum/eth2.0-specs/issues/1332)). Additional topics are used to propagate lower frequency validator messages. Their `TopicName`’s are: @@ -158,12 +159,14 @@ Additional topics are used to propagate lower frequency validator messages. Thei #### Interop -Unaggregated attestations from all shards are sent to the `beacon_attestation` topic. +Unaggregated and aggregated attestations from all shards are sent to the `beacon_attestation` topic. Clients are not required to publish aggregate attestations but must be able to process them. #### Mainnet Shards are grouped into their own subnets (defined by a shard topic). The number of shard subnets is defined via `SHARD_SUBNET_COUNT` and the shard `shard_number % SHARD_SUBNET_COUNT` is assigned to the topic: `shard{shard_number % SHARD_SUBNET_COUNT}_beacon_attestation`. Unaggregated attestations are sent to the subnet topic. Aggregated attestations are sent to the `beacon_attestation` topic. +TODO: [aggregation strategy](https://github.com/ethereum/eth2.0-specs/issues/1331) + ### Messages Each gossipsub [message](https://github.com/libp2p/go-libp2p-pubsub/blob/master/pb/rpc.proto#L17-L24) has a maximum size of `GOSSIP_MAX_SIZE`. @@ -200,51 +203,6 @@ Topics are post-fixed with an encoding. Encodings define how the payload of a go Implementations MUST use a single encoding. Changing an encoding will require coordination between participating implementations. -## The discovery domain: discv5 - -Discovery Version 5 ([discv5](https://github.com/ethereum/devp2p/blob/master/discv5/discv5.md)) is used for peer discovery, both in the interoperability testnet and mainnet. - -`discv5` is a standalone protocol, running on UDP on a dedicated port, meant for peer discovery only. `discv5` supports self-certified, flexible peer records (ENRs) and topic-based advertisement, both of which are (or will be) requirements in this context. - -### Integration into libp2p stacks - -`discv5` SHOULD be integrated into the client’s libp2p stack by implementing an adaptor to make it conform to the [service discovery](https://github.com/libp2p/go-libp2p-core/blob/master/discovery/discovery.go) and [peer routing](https://github.com/libp2p/go-libp2p-core/blob/master/routing/routing.go#L36-L44) abstractions and interfaces (go-libp2p links provided). - -Inputs to operations include peer IDs (when locating a specific peer), or capabilities (when searching for peers with a specific capability), and the outputs will be multiaddrs converted from the ENR records returned by the discv5 backend. - -This integration enables the libp2p stack to subsequently form connections and streams with discovered peers. - -### ENR structure - -The Ethereum Node Record (ENR) for an Ethereum 2.0 client MUST contain the following entries (exclusive of the sequence number and signature, which MUST be present in an ENR): - -- The compressed secp256k1 publickey, 33 bytes (`secp256k1` field). -- An IPv4 address (`ip` field) and/or IPv6 address (`ip6` field). -- A TCP port (`tcp` field) representing the local libp2p listening port. -- A UDP port (`udp` field) representing the local discv5 listening port. - -Specifications of these parameters can be found in the [ENR Specification](http://eips.ethereum.org/EIPS/eip-778). - -#### Interop - -In the interoperability testnet, all peers will support all capabilities defined in this document (gossip, full Req/Resp suite, discovery protocol), therefore the ENR record does not need to carry ETH2 capability information, as it would be superfluous. - -Nonetheless, ENRs MUST carry a generic `eth2` key with nil value, denoting that the peer is indeed an ETH2 peer, in order to eschew connecting to ETH1 peers. - -#### Mainnet - -On mainnet, ENRs MUST include a structure enumerating the capabilities offered by the peer in an efficient manner. The concrete solution is currently undefined. Proposals include using namespaced bloom filters mapping capabilities to specific protocol IDs supported under that capability. - -### Topic advertisement - -#### Interop - -This feature will not be used in the interoperability testnet. - -#### Mainnet - -In mainnet, we plan to use discv5’s topic advertisement feature as a rendezvous facility for peers on shards (thus subscribing to the relevant gossipsub topics). - ## The Req/Resp domain ### Protocol identification @@ -288,7 +246,7 @@ Once a new stream with the protocol ID for the request type has been negotiated, The requester MUST close the write side of the stream once it finishes writing the request message - at this point, the stream will be half-closed. -The requester MUST wait a maximum of **5 seconds** for the first response byte to arrive (time to first byte – or TTFB – timeout). On that happening, the requester will allow further **10 seconds** to receive the full response. +The requester MUST wait a maximum of `TTFB_TIMEOUT` for the first response byte to arrive (time to first byte – or TTFB – timeout). On that happening, the requester will allow further `RESP_TIMEOUT` to receive the full response. If any of these timeouts fire, the requester SHOULD reset the stream and deem the req/resp operation to have failed. @@ -306,11 +264,11 @@ The responder MUST: If steps (1), (2) or (3) fail due to invalid, malformed or inconsistent data, the responder MUST respond in error. Clients tracking peer reputation MAY record such failures, as well as unexpected events, e.g. early stream resets. -The entire request should be read in no more than **5 seconds**. Upon a timeout, the responder SHOULD reset the stream. +The entire request should be read in no more than `RESP_TIMEOUT`. Upon a timeout, the responder SHOULD reset the stream. The responder SHOULD send a response promptly, starting with a **single-byte** response code which determines the contents of the response (`result` particle in the BNF grammar above). -It can have one of the following values: +It can have one of the following values, encoded as a single unsigned byte: - 0: **Success** -- a normal response follows, with contents matching the expected message schema and encoding specified in the request. - 1: **InvalidRequest** -- the contents of the request are semantically invalid, or the payload is malformed, or could not be understood. The response payload adheres to the `ErrorMessage` schema (described below). @@ -461,6 +419,53 @@ Requests blocks by their block roots. The response is a list of `BeaconBlock` wi Clients MUST support requesting blocks since the latest finalized epoch. +## The discovery domain: discv5 + +Discovery Version 5 ([discv5](https://github.com/ethereum/devp2p/blob/master/discv5/discv5.md)) is used for peer discovery, both in the interoperability testnet and mainnet. + +`discv5` is a standalone protocol, running on UDP on a dedicated port, meant for peer discovery only. `discv5` supports self-certified, flexible peer records (ENRs) and topic-based advertisement, both of which are (or will be) requirements in this context. + +:warning: Under construction. :warning: + +### Integration into libp2p stacks + +`discv5` SHOULD be integrated into the client’s libp2p stack by implementing an adaptor to make it conform to the [service discovery](https://github.com/libp2p/go-libp2p-core/blob/master/discovery/discovery.go) and [peer routing](https://github.com/libp2p/go-libp2p-core/blob/master/routing/routing.go#L36-L44) abstractions and interfaces (go-libp2p links provided). + +Inputs to operations include peer IDs (when locating a specific peer), or capabilities (when searching for peers with a specific capability), and the outputs will be multiaddrs converted from the ENR records returned by the discv5 backend. + +This integration enables the libp2p stack to subsequently form connections and streams with discovered peers. + +### ENR structure + +The Ethereum Node Record (ENR) for an Ethereum 2.0 client MUST contain the following entries (exclusive of the sequence number and signature, which MUST be present in an ENR): + +- The compressed secp256k1 publickey, 33 bytes (`secp256k1` field). +- An IPv4 address (`ip` field) and/or IPv6 address (`ip6` field). +- A TCP port (`tcp` field) representing the local libp2p listening port. +- A UDP port (`udp` field) representing the local discv5 listening port. + +Specifications of these parameters can be found in the [ENR Specification](http://eips.ethereum.org/EIPS/eip-778). + +#### Interop + +In the interoperability testnet, all peers will support all capabilities defined in this document (gossip, full Req/Resp suite, discovery protocol), therefore the ENR record does not need to carry ETH2 capability information, as it would be superfluous. + +Nonetheless, ENRs MUST carry a generic `eth2` key with nil value, denoting that the peer is indeed an ETH2 peer, in order to eschew connecting to ETH1 peers. + +#### Mainnet + +On mainnet, ENRs MUST include a structure enumerating the capabilities offered by the peer in an efficient manner. The concrete solution is currently undefined. Proposals include using namespaced bloom filters mapping capabilities to specific protocol IDs supported under that capability. + +### Topic advertisement + +#### Interop + +This feature will not be used in the interoperability testnet. + +#### Mainnet + +In mainnet, we plan to use discv5’s topic advertisement feature as a rendezvous facility for peers on shards (thus subscribing to the relevant gossipsub topics). + # Design Decision Rationale ## Transport @@ -601,7 +606,19 @@ For future extensibility with almost zero overhead now (besides the extra bytes ### How do we upgrade gossip channels (e.g. changes in encoding, compression)? -Such upgrades lead to fragmentation, so they’ll need to be carried out in a coordinated manner most likely during a hard fork. +Changing gossipsub / broadcasts requires a coordinated upgrade where all clients start publishing to the new topic together, for example during a hard fork. + +One can envision a two-phase deployment as well where clients start listening to the new topic in a first phase then start publishing some time later, letting the traffic naturally move over to the new topic. + +### Why must all clients use the same gossip topic instead of one negotiated between each peer pair? + +Supporting multiple topics / encodings would require the presence of relayers to translate between encodings and topics so as to avoid network fragmentation where participants have diverging views on the gossiped state, making the protocol more complicated and fragile. + +Gossip protocols typically remember what messages they've seen for a finite period of time based on message identity - if you publish the same message again after that time has passed, it will be re-broadcast - adding a relay delay also makes this scenario more likely. + +One can imagine that in a complicated upgrade scenario, we might have peers publishing the same message on two topics/encodings, but the price here is pretty high in terms of overhead - both computational and networking, so we'd rather avoid that. + +It is permitted for clients to publish data on alternative topics as long as they also publish on the network-wide mandatory topic. ### Why are the topics strings and not hashes? @@ -625,7 +642,7 @@ The prohibition of unverified-block-gossiping extends to nodes that cannot verif ### How are we going to discover peers in a gossipsub topic? -Via discv5 topics. ENRs should not be used for this purpose, as they store identity, location and capability info, not volatile advertisements. +Via discv5 topics. ENRs should not be used for this purpose, as they store identity, location and capability info, not volatile [advertisements](#topic-advertisement). In the interoperability testnet, all peers will be subscribed to all global beacon chain topics, so discovering peers in specific shard topics will be unnecessary. From 63e2915e1248bae426d5a7d7da912bd9dd628f8e Mon Sep 17 00:00:00 2001 From: protolambda Date: Fri, 2 Aug 2019 21:43:36 +0200 Subject: [PATCH 095/130] update SSZ static doc to reflect options in test conditions --- specs/test_formats/ssz_static/core.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/specs/test_formats/ssz_static/core.md b/specs/test_formats/ssz_static/core.md index a7301a3c8..d0cfd25f6 100644 --- a/specs/test_formats/ssz_static/core.md +++ b/specs/test_formats/ssz_static/core.md @@ -40,10 +40,14 @@ The same value as `serialized.ssz`, represented as YAML. ## Condition A test-runner can implement the following assertions: -- Serialization: After parsing the `value`, SSZ-serialize it: the output should match `serialized` +- If YAML decoding of SSZ objects is supported by the implementation: + - Serialization: After parsing the `value`, SSZ-serialize it: the output should match `serialized` + - Deserialization: SSZ-deserialize the `serialized` value, and see if it matches the parsed `value` +- If YAML decoding of SSZ objects is not supported by the implementation: + - Serialization in 2 steps: deserialize `serialized`, then serialize the result, + and verify if the bytes match the original `serialized`. - Hash-tree-root: After parsing the `value` (or deserializing `serialized`), Hash-tree-root it: the output should match `root` - Optionally also check `signing_root`, if present. -- Deserialization: SSZ-deserialize the `serialized` value, and see if it matches the parsed `value` ## References From cf1d49a1dec7f7ed8d612b039b2dccc566883536 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Sat, 3 Aug 2019 09:27:49 +0200 Subject: [PATCH 096/130] cleanups --- specs/networking/p2p-interface.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index ed2047190..eaa767216 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -51,9 +51,7 @@ Even though libp2p is a multi-transport stack (designed to listen on multiple si #### Interop -All implementations MUST support the TCP libp2p transport, and it MUST be enabled for both dialing and listening (i.e. outbound and inbound connections). - -The libp2p TCP transport supports listening on IPv4 and IPv6 addresses (and on multiple simultaneously). Clients SHOULD allow the operator to configure the listen IP addresses and ports, including the addressing schemes (IPv4, IPv6). +All implementations MUST support the TCP libp2p transport, and it MUST be enabled for both dialing and listening (i.e. outbound and inbound connections). The libp2p TCP transport supports listening on IPv4 and IPv6 addresses (and on multiple simultaneously). To facilitate connectivity, and avert possible IPv6 routability/support issues, clients participating in the interoperability testnet MUST expose at least ONE IPv4 endpoint. @@ -236,9 +234,9 @@ result ::= “0” | “1” | “2” | [“128” ... ”255”] The encoding-dependent header may carry metadata or assertions such as the encoded payload length, for integrity and attack proofing purposes. It is not strictly necessary to length-prefix payloads, because req/resp streams are single-use, and stream closures implicitly delimit the boundaries, but certain encodings like SSZ do, for added security. -`encoded-payload` has a maximum byte size of `RQRP_MAX_SIZE`. +`encoded-payload` has a maximum byte size of `REQ_RESP_MAX_SIZE`. -Clients MUST ensure the payload size is less than or equal to `RQRP_MAX_SIZE`, if not, they SHOULD reset the stream immediately. Clients tracking peer reputation MAY decrement the score of the misbehaving peer under this circumstance. +Clients MUST ensure the payload size is less than or equal to `REQ_RESP_MAX_SIZE`, if not, they SHOULD reset the stream immediately. Clients tracking peer reputation MAY decrement the score of the misbehaving peer under this circumstance. #### Requesting side @@ -286,7 +284,7 @@ The `ErrorMessage` schema is: ) ``` -*Note that the String type is encoded as UTF-8 bytes when SSZ-encoded.* +*Note that the String type is encoded as UTF-8 bytes without NULL terminator when SSZ-encoded.* A response therefore has the form: ``` From 32a43acfc1372b331fe9a7b9032eec366cef4cd7 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Thu, 25 Jul 2019 10:18:10 +1000 Subject: [PATCH 097/130] Clarify length bit for Bitlist merklization --- specs/simple-serialize.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index f479c5d00..119022248 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -189,7 +189,7 @@ We first define helper functions: * `List[B, N]` and `Vector[B, N]`, where `B` is a basic type: `(N * size_of(B) + 31) // 32` (dividing by chunk size, rounding up) * `List[C, N]` and `Vector[C, N]`, where `C` is a composite type: `N` * containers: `len(fields)` -* `bitfield_bytes(bits)`: return the bits of the bitlist or bitvector, packed in bytes, aligned to the start. Exclusive length-delimiting bit for bitlists. +* `bitfield_bytes(bits)`: return the bits of the bitlist or bitvector, packed in bytes, aligned to the start. Length-delimiting bit for bitlists is excluded. * `pack`: Given ordered objects of the same basic type, serialize them, pack them into `BYTES_PER_CHUNK`-byte chunks, right-pad the last chunk with zero bytes, and return the chunks. * `next_pow_of_two(i)`: get the next power of 2 of `i`, if not already a power of 2, with 0 mapping to 1. Examples: `0->1, 1->1, 2->2, 3->4, 4->4, 6->8, 9->16` * `merkleize(chunks, limit=None)`: Given ordered `BYTES_PER_CHUNK`-byte chunks, merkleize the chunks, and return the root: From d09d56bec8cec98e3d77a286c48972521dd2be96 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Sun, 4 Aug 2019 20:56:41 +0200 Subject: [PATCH 098/130] discuss length-prefixing pro/con, consider for removal, add link --- specs/networking/p2p-interface.md | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index eaa767216..6f79b5d49 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -664,11 +664,23 @@ CAVEAT: the protocol negotiation component in the current version of libp2p is c ### Why are messages length-prefixed with a protobuf varint in the SSZ encoding? -In stream-oriented protocols, we need to delimit messages from one another, so that the reader knows where one message ends and the next one starts. Length-prefixing is an effective solution. Alternatively, one could set a delimiter char/string, but this can readily cause ambiguity if the message itself may contain the delimiter. It also introduces another set of edge cases to model for, thus causing unnecessary complexity, especially if messages are to be compressed (and thus mutated beyond our control). +We are using single-use streams where each stream is closed at the end of the message - thus libp2p transparently handles message delimiting in the underlying stream. libp2p streams are full-duplex, and each party is responsible for closing their write side (like in TCP). We can therefore use stream closure to mark the end of the request and response independently. -That said, in our case, streams are single-use. libp2p streams are full-duplex, and each party is responsible for closing their write side (like in TCP). We therefore use stream closure to mark the end of a request. +Nevertheless, messages are still length-prefixed - this is now being considered for removal. -Nevertheless, messages are still length-prefixed to prevent DOS attacks where malicious actors send large amounts of data disguised as a request. A length prefix allows clients to set a maximum limit, and once that limit is read, the client can cease reading and disconnect the stream. This allows a client to determine the exact length of the packet being sent, and it capacitates it to reset the stream early if the other party expresses they intend to send too much data. +Advantages of length-prefixing include: + +* Reader can prepare a correctly sized buffer before reading message +* Alignment with protocols like gRPC over HTTP/2 that prefix with length +* Sanity checking of stream closure / message length + +Disadvantages include: + +* Redundant methods of message delimiting - both stream end marker and length prefix +* Harder to stream as length must be known up-front +* Additional code path required to verify length + +In some protocols, adding a length prefix serves as a form of DoS protection against very long messages, allowing the client to abort if an overlong message is about to be sent. In this protocol, we are globally limiting message sizes using `REQ_RESP_MAX_SIZE`, thus an the length prefix does not afford any additional protection. [Protobuf varint](https://developers.google.com/protocol-buffers/docs/encoding#varints) is an efficient technique to encode variable-length ints. Instead of reserving a fixed-size field of as many bytes as necessary to convey the maximum possible value, this field is elastic in exchange for 1-bit overhead per byte. @@ -728,7 +740,7 @@ SSZ has well defined schema’s for consensus objects (typically sent across the We compress on the wire to achieve smaller payloads per-message, which, in aggregate, result in higher efficiency, better utilisation of available bandwidth, and overall reduction in network-wide traffic overhead. -At this time, libp2p does not have an out-of-the-box compression feature that can be dynamically negotiated and layered atop connections and streams, but this will be raised in the libp2p community for consideration. +At this time, libp2p does not have an out-of-the-box compression feature that can be dynamically negotiated and layered atop connections and streams, but is [being considered](https://github.com/libp2p/libp2p/issues/81). This is a non-trivial feature because the behaviour of network IO loops, kernel buffers, chunking, packet fragmentation, amongst others, need to be taken into account. libp2p streams are unbounded streams, whereas compression algorithms work best on bounded byte streams of which we have some prior knowledge. From 499e187382798060b3c880782b5563e4253c0bcf Mon Sep 17 00:00:00 2001 From: Ben Edgington Date: Mon, 5 Aug 2019 12:19:32 +0100 Subject: [PATCH 099/130] Fix constants table Tables need header rows in Markdown. --- specs/networking/p2p-interface.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index 6f79b5d49..6fca087da 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -111,6 +111,8 @@ Clients MUST support [mplex](https://github.com/libp2p/specs/tree/master/mplex) This section outlines constants that are used in this spec. +| Name | Value | Description | +|---|---|---| | `REQ_RESP_MAX_SIZE` | `TODO` | The max size of uncompressed req/resp messages that clients will allow. | | `GOSSIP_MAX_SIZE` | `2**20` (= 1048576, 1 MiB) | The max size of uncompressed gossip messages | | `SHARD_SUBNET_COUNT` | `TODO` | The number of shard subnets used in the gossipsub protocol. | From cb92aa91ddaedb9dc6cff67718803577bf82ee03 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Mon, 5 Aug 2019 14:37:38 -0400 Subject: [PATCH 100/130] Include state root blocks in crosslink data in non-block slots Also adds `total_bytes` to state. The goal is to facilitate easier fraud proofs, so that one needs to simply check two adjacent headers in a crosslink and their respective bodies to verify a fraud proof. --- specs/core/1_shard-data-chains.md | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index 317011716..b82ae5732 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -181,6 +181,7 @@ class ShardState(Container): shard: Shard most_recent_block_core: ShardBlockCore receipt_root: Hash + total_bytes: uint64 ``` ### `ShardReceiptDelta` @@ -531,7 +532,9 @@ def shard_block_transition(state: ShardState, )) # Check total bytes - assert block.core.total_bytes == state.most_recent_block_core.total_bytes + len(block.core.data) + state.total_bytes += len(block.core.data) + assert block.core.total_bytes == state.total_bytes + # Update in-state block header state.most_recent_block_core = ShardBlockCore( @@ -554,14 +557,16 @@ def shard_block_transition(state: ShardState, Let: - `shard` be a valid `Shard` -- `shard_blocks` be the `ShardBlock` list such that `shard_blocks[slot]` is the canonical `ShardBlock` for shard `shard` at slot `slot` +- `pre_state` is the `ShardState` before processing any blocks +- `shard_blocks_or_state_roots` be the `Union[ShardBlock, Hash]` list such that `shard_blocks[slot]` is the canonical `ShardBlock` for shard `shard` at slot `slot` if a block exists, or the post-state-root of processing state up to and including that slot if a block does not exist. - `beacon_state` be the canonical `BeaconState` - `valid_attestations` be the set of valid `Attestation` objects, recursively defined - `candidate` be a candidate `Attestation` which is valid under Phase 0 rules, and for which validity is to be determined under Phase 1 rules by running `is_valid_beacon_attestation` ```python def is_valid_beacon_attestation(shard: Shard, - shard_blocks: Sequence[ShardBlock], + pre_state: ShardState, + shard_blocks_or_state_roots: Sequence[Union[ShardBlock, Hash]], beacon_state: BeaconState, valid_attestations: Set[Attestation], candidate: Attestation) -> bool: @@ -588,7 +593,14 @@ def is_valid_beacon_attestation(shard: Shard, start_epoch + MAX_EPOCHS_PER_CROSSLINK) blocks = [] for slot in range(start_epoch * SLOTS_PER_EPOCH, end_epoch * SLOTS_PER_EPOCH): - blocks.append(shard_blocks[slot]) + if isinstance(shard_blocks_or_state_roots[slot], ShardBlock): + blocks.append(shard_blocks_or_state_roots[slot]) + else: + blocks.append(ShardBlockHeader(ShardBlockCore( + slot=slot, + state_root=shard_blocks_or_state_roots[slot], + total_bytes=state.total_bytes + ), ShardBlockSignatures())) assert candidate.data.crosslink.data_root == compute_crosslink_data_root(blocks) return True From 095cfe6633f9dbe62e8d4745665bcf4361da3a4c Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Wed, 7 Aug 2019 19:29:24 +0800 Subject: [PATCH 101/130] Fix build_spec and typo --- scripts/build_spec.py | 2 +- specs/core/1_shard-data-chains.md | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 96866cc8a..52642c8f4 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -37,7 +37,7 @@ from eth2spec.utils.bls import ( from eth2spec.utils.hash_function import hash ''' PHASE1_IMPORTS = '''from typing import ( - Any, Dict, Optional, Set, Sequence, MutableSequence, Tuple, + Any, Dict, Optional, Set, Sequence, MutableSequence, Tuple, Union, ) from dataclasses import ( diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index b82ae5732..283c1a9ca 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -535,7 +535,6 @@ def shard_block_transition(state: ShardState, state.total_bytes += len(block.core.data) assert block.core.total_bytes == state.total_bytes - # Update in-state block header state.most_recent_block_core = ShardBlockCore( slot=block.core.slot, @@ -599,7 +598,7 @@ def is_valid_beacon_attestation(shard: Shard, blocks.append(ShardBlockHeader(ShardBlockCore( slot=slot, state_root=shard_blocks_or_state_roots[slot], - total_bytes=state.total_bytes + total_bytes=pre_state.total_bytes ), ShardBlockSignatures())) assert candidate.data.crosslink.data_root == compute_crosslink_data_root(blocks) From 2a2c9967a803dae41472578da9faf6bd62d8daba Mon Sep 17 00:00:00 2001 From: Age Manning Date: Fri, 9 Aug 2019 12:16:07 +1000 Subject: [PATCH 102/130] Minor corrections and clarifications to the network specification --- specs/networking/p2p-interface.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index 6fca087da..84ad45022 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -114,6 +114,7 @@ This section outlines constants that are used in this spec. | Name | Value | Description | |---|---|---| | `REQ_RESP_MAX_SIZE` | `TODO` | The max size of uncompressed req/resp messages that clients will allow. | +| `SSZ_MAX_LIST_SIZE` | `TODO` | The max size of SSZ-encoded variable lists. | | `GOSSIP_MAX_SIZE` | `2**20` (= 1048576, 1 MiB) | The max size of uncompressed gossip messages | | `SHARD_SUBNET_COUNT` | `TODO` | The number of shard subnets used in the gossipsub protocol. | | `TTFB_TIMEOUT` | `5s` | Maximum time to wait for first byte of request response (time-to-first-byte) | @@ -195,11 +196,11 @@ Topics are post-fixed with an encoding. Encodings define how the payload of a go #### Interop -- `ssz` - All objects are SSZ-encoded. Example: The beacon block topic string is: `/beacon_block/ssz` and the data field of a gossipsub message is an ssz-encoded `BeaconBlock`. +- `ssz` - All objects are SSZ-encoded. Example: The beacon block topic string is: `/eth2/beacon_block/ssz` and the data field of a gossipsub message is an ssz-encoded `BeaconBlock`. #### Mainnet -- `ssz_snappy` - All objects are ssz-encoded and then compressed with snappy. Example: The beacon attestation topic string is: `/beacon_attestation/ssz_snappy` and the data field of a gossipsub message is an `Attestation` that has been ssz-encoded then compressed with snappy. +- `ssz_snappy` - All objects are ssz-encoded and then compressed with snappy. Example: The beacon attestation topic string is: `/eth2/beacon_attestation/ssz_snappy` and the data field of a gossipsub message is an `Attestation` that has been ssz-encoded then compressed with snappy. Implementations MUST use a single encoding. Changing an encoding will require coordination between participating implementations. @@ -286,7 +287,7 @@ The `ErrorMessage` schema is: ) ``` -*Note that the String type is encoded as UTF-8 bytes without NULL terminator when SSZ-encoded.* +*Note that the String type is encoded as UTF-8 bytes without NULL terminator when SSZ-encoded. As the `ErrorMessage` is not an SSZ-container, only the UTF-8 bytes will be sent when SSZ-encoded.* A response therefore has the form: ``` @@ -300,7 +301,8 @@ Here `result` represents the 1-byte response code. The token of the negotiated protocol ID specifies the type of encoding to be used for the req/resp interaction. Two values are possible at this time: -- `ssz`: the contents are [SSZ](https://github.com/ethereum/eth2.0-specs/blob/192442be51a8a6907d6401dffbf5c73cb220b760/specs/networking/libp2p-standardization.md#ssz-encoding) encoded. This encoding type MUST be supported by all clients. +- `ssz`: the contents are [SSZ](https://github.com/ethereum/eth2.0-specs/blob/192442be51a8a6907d6401dffbf5c73cb220b760/specs/networking/libp2p-standardization.md#ssz-encoding) encoded. This encoding type MUST be supported by all clients. + For objects containing a single field, only the field is SSZ-encoded not a container with a single field. For example, the `BeaconBlocks` response would be an SSZ-encoded list of `BeaconBlock`s. All SSZ-Lists in the Req/Resp domain will have a max-list size of `SSZ_MAX_LIST_SIZE`. - `ssz_snappy`: the contents are SSZ encoded, and subsequently compressed with [Snappy](https://github.com/google/snappy). MAY be supported in the interoperability testnet; and MUST be supported in mainnet. #### SSZ encoding strategy (with or without Snappy) From 5808ab3ce7d5d6385ac3082b362df8894cf168e9 Mon Sep 17 00:00:00 2001 From: JSON <49416440+JSON@users.noreply.github.com> Date: Fri, 9 Aug 2019 00:30:05 -0500 Subject: [PATCH 103/130] doc standardization for networking spec (#1338) * Update p2p-interface.md * Update p2p-interface.md * Update p2p-interface.md * Update specs/networking/p2p-interface.md Co-Authored-By: Hsiao-Wei Wang * Update specs/networking/p2p-interface.md Co-Authored-By: Hsiao-Wei Wang * Update specs/networking/p2p-interface.md Co-Authored-By: Hsiao-Wei Wang * Update specs/networking/p2p-interface.md Co-Authored-By: Hsiao-Wei Wang * Update specs/networking/p2p-interface.md Co-Authored-By: Hsiao-Wei Wang * Update specs/networking/p2p-interface.md Co-Authored-By: Hsiao-Wei Wang * Update specs/networking/p2p-interface.md Co-Authored-By: Hsiao-Wei Wang * Update specs/networking/p2p-interface.md Co-Authored-By: Hsiao-Wei Wang * Update specs/networking/p2p-interface.md Co-Authored-By: Hsiao-Wei Wang * Update specs/networking/p2p-interface.md Co-Authored-By: Hsiao-Wei Wang --- specs/networking/p2p-interface.md | 227 +++++++++++++++--------------- 1 file changed, 113 insertions(+), 114 deletions(-) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index 6fca087da..53e203ca6 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -1,45 +1,45 @@ -# Overview +# Ethereum 2.0 networking specification -This document contains the network specification for Ethereum 2.0 clients. +This document contains the networking specification for Ethereum 2.0 clients. It consists of four main sections: -1. A specification of the network fundamentals detailing the two network configurations: interoperability test network, and mainnet launch. -2. A specification of the three network interaction _domains_ of ETH2.0: (a) the gossip domain, (b) the discovery domain, \(c\) the Req/Resp domain. +1. A specification of the network fundamentals detailing the two network configurations: interoperability test network and mainnet launch. +2. A specification of the three network interaction *domains* of Eth 2.0: (a) the gossip domain, (b) the discovery domain, and (c) the Req/Resp domain. 3. The rationale and further explanation for the design choices made in the previous two sections. -4. An analysis of the maturity/state of the libp2p features required by this spec across the languages in which ETH 2.0 clients are being developed. +4. An analysis of the maturity/state of the libp2p features required by this spec across the languages in which Eth 2.0 clients are being developed. -## Table of Contents +## Table of contents -- [Network Fundamentals](#network-fundamentals) +- [Network fundamentals](#network-fundamentals) - [Transport](#transport) - [Encryption and identification](#encryption-and-identification) - - [Protocol Negotiation](#protocol-negotiation) + - [Protocol negotiation](#protocol-negotiation) - [Multiplexing](#multiplexing) -- [ETH2 network interaction domains](#eth2-network-interaction-domains) +- [Eth 2.0 network interaction domains](#eth-20-network-interaction-domains) - [Configuration](#configuration) - [The gossip domain: gossipsub](#the-gossip-domain-gossipsub) - [The Req/Resp domain](#the-reqresp-domain) - [The discovery domain: discv5](#the-discovery-domain-discv5) -- [Design Decision Rationale](#design-decision-rationale) +- [Design decision rationale](#design-decision-rationale) - [Transport](#transport-1) - [Multiplexing](#multiplexing-1) - - [Protocol Negotiation](#protocol-negotiation-1) + - [Protocol negotiation](#protocol-negotiation-1) - [Encryption](#encryption) - [Gossipsub](#gossipsub) - [Req/Resp](#reqresp) - [Discovery](#discovery) - [Compression/Encoding](#compressionencoding) -- [libp2p Implementations Matrix](#libp2p-implementations-matrix) +- [libp2p implementations matrix](#libp2p-implementations-matrix) -# Network Fundamentals +# Network fundamentals This section outlines the specification for the networking stack in Ethereum 2.0 clients. @@ -53,9 +53,9 @@ Even though libp2p is a multi-transport stack (designed to listen on multiple si All implementations MUST support the TCP libp2p transport, and it MUST be enabled for both dialing and listening (i.e. outbound and inbound connections). The libp2p TCP transport supports listening on IPv4 and IPv6 addresses (and on multiple simultaneously). -To facilitate connectivity, and avert possible IPv6 routability/support issues, clients participating in the interoperability testnet MUST expose at least ONE IPv4 endpoint. +To facilitate connectivity and avert possible IPv6 routability/support issues, clients participating in the interoperability testnet MUST expose at least ONE IPv4 endpoint. -All listening endpoints must be publicly dialable, and thus not rely on libp2p circuit relay, AutoNAT or AutoRelay facilities. +All listening endpoints must be publicly dialable, and thus not rely on libp2p circuit relay, AutoNAT, or AutoRelay facilities. Nodes operating behind a NAT, or otherwise undialable by default (e.g. container runtime, firewall, etc.), MUST have their infrastructure configured to enable inbound traffic on the announced public listening endpoint. @@ -65,7 +65,7 @@ All requirements from the interoperability testnet apply, except for the IPv4 ad At this stage, clients are licensed to drop IPv4 support if they wish to do so, cognizant of the potential disadvantages in terms of Internet-wide routability/support. Clients MAY choose to listen only on IPv6, but MUST retain capability to dial both IPv4 and IPv6 addresses. -Usage of circuit relay, AutoNAT or AutoRelay will be specifically re-examined closer to the time. +Usage of circuit relay, AutoNAT, or AutoRelay will be specifically re-examined closer to the time. ## Encryption and identification @@ -81,9 +81,9 @@ The following SecIO parameters MUST be supported by all stacks: #### Mainnet -[Noise Framework](http://www.noiseprotocol.org/) handshakes will be used for mainnet. libp2p Noise support [is in the process of being standardised](https://github.com/libp2p/specs/issues/195) in the libp2p project. +[Noise Framework](http://www.noiseprotocol.org/) handshakes will be used for mainnet. libp2p Noise support [is in the process of being standardized](https://github.com/libp2p/specs/issues/195) in the libp2p project. -Noise support will presumably include IX, IK and XX handshake patterns, and may rely on Curve25519 keys, ChaCha20 and Poly1305 ciphers, and SHA-256 as a hash function. These aspects are being actively debated in the referenced issue [ETH 2.0 implementers are welcome to comment and contribute to the discussion.] +Noise support will presumably include IX, IK, and XX handshake patterns, and may rely on Curve25519 keys, ChaCha20 and Poly1305 ciphers, and SHA-256 as a hash function. These aspects are being actively debated in the referenced issue (Eth 2.0 implementers are welcome to comment and contribute to the discussion). ## Protocol Negotiation @@ -91,7 +91,7 @@ Clients MUST use exact equality when negotiating protocol versions to use and MA #### Interop -Connection-level and stream-level (see the rationale section below for explanations) protocol negotiation MUST be conducted using [multistream-select v1.0](https://github.com/multiformats/multistream-select/). Its protocol ID is: `/multistream/1.0.0`. +Connection-level and stream-level (see the [Rationale](#design-decision-rationale) section below for explanations) protocol negotiation MUST be conducted using [multistream-select v1.0](https://github.com/multiformats/multistream-select/). Its protocol ID is: `/multistream/1.0.0`. #### Mainnet @@ -103,9 +103,9 @@ During connection bootstrapping, libp2p dynamically negotiates a mutually suppor Two multiplexers are commonplace in libp2p implementations: [mplex](https://github.com/libp2p/specs/tree/master/mplex) and [yamux](https://github.com/hashicorp/yamux/blob/master/spec.md). Their protocol IDs are, respectively: `/mplex/6.7.0` and `/yamux/1.0.0`. -Clients MUST support [mplex](https://github.com/libp2p/specs/tree/master/mplex) and MAY support [yamux](https://github.com/hashicorp/yamux/blob/master/spec.md). If both are supported by the client, yamux must take precedence during negotiation. See the Rationale section of this document for tradeoffs. +Clients MUST support [mplex](https://github.com/libp2p/specs/tree/master/mplex) and MAY support [yamux](https://github.com/hashicorp/yamux/blob/master/spec.md). If both are supported by the client, yamux must take precedence during negotiation. See the [Rationale](#design-decision-rationale) section below for tradeoffs. -# ETH2 network interaction domains +# Eth 2.0 network interaction domains ## Configuration @@ -113,11 +113,11 @@ This section outlines constants that are used in this spec. | Name | Value | Description | |---|---|---| -| `REQ_RESP_MAX_SIZE` | `TODO` | The max size of uncompressed req/resp messages that clients will allow. | -| `GOSSIP_MAX_SIZE` | `2**20` (= 1048576, 1 MiB) | The max size of uncompressed gossip messages | +| `REQ_RESP_MAX_SIZE` | `TODO` | The maximum size of uncompressed req/resp messages that clients will allow. | +| `GOSSIP_MAX_SIZE` | `2**20` (= 1048576, 1 MiB) | The maximum size of uncompressed gossip messages. | | `SHARD_SUBNET_COUNT` | `TODO` | The number of shard subnets used in the gossipsub protocol. | -| `TTFB_TIMEOUT` | `5s` | Maximum time to wait for first byte of request response (time-to-first-byte) | -| `RESP_TIMEOUT` | `10s` | Maximum time for complete response transfer | +| `TTFB_TIMEOUT` | `5s` | The maximum time to wait for first byte of request response (time-to-first-byte). | +| `RESP_TIMEOUT` | `10s` | The maximum time for complete response transfer. | ## The gossip domain: gossipsub @@ -127,7 +127,7 @@ Clients MUST support the [gossipsub](https://github.com/libp2p/specs/tree/master **Gossipsub Parameters** -*Note: Parameters listed here are subject to a large-scale network feasibility study.* +*Note*: Parameters listed here are subject to a large-scale network feasibility study. The following gossipsub [parameters](https://github.com/libp2p/specs/tree/master/pubsub/gossipsub#meshsub-an-overlay-mesh-router) will be used: @@ -142,16 +142,16 @@ The following gossipsub [parameters](https://github.com/libp2p/specs/tree/master ### Topics -Topics are plain UTF-8 strings, and are encoded on the wire as determined by protobuf (gossipsub messages are enveloped in protobuf messages). +Topics are plain UTF-8 strings and are encoded on the wire as determined by protobuf (gossipsub messages are enveloped in protobuf messages). Topic strings have form: `/eth2/TopicName/TopicEncoding`. This defines both the type of data being sent on the topic and how the data field of the message is encoded. (Further details can be found in [Messages](#Messages)). -There are two main topics used to propagate attestations and beacon blocks to all nodes on the network. Their `TopicName`'s are: +There are two main topics used to propagate attestations and beacon blocks to all nodes on the network. Their `TopicName`s are: - `beacon_block` - This topic is used solely for propagating new beacon blocks to all nodes on the networks. Blocks are sent in their entirety. Clients MUST validate the block proposer signature before forwarding it across the network. - `beacon_attestation` - This topic is used to propagate aggregated attestations (in their entirety) to subscribing nodes (typically block proposers) to be included in future blocks. Clients MUST validate that the block being voted for passes validation before forwarding the attestation on the network (TODO: [additional validations](https://github.com/ethereum/eth2.0-specs/issues/1332)). -Additional topics are used to propagate lower frequency validator messages. Their `TopicName`’s are: +Additional topics are used to propagate lower frequency validator messages. Their `TopicName`s are: - `voluntary_exit` - This topic is used solely for propagating voluntary validator exits to proposers on the network. Voluntary exits are sent in their entirety. Clients who receive a voluntary exit on this topic MUST validate the conditions within `process_voluntary_exit` before forwarding it across the network. - `proposer_slashing` - This topic is used solely for propagating proposer slashings to proposers on the network. Proposer slashings are sent in their entirety. Clients who receive a proposer slashing on this topic MUST validate the conditions within `process_proposer_slashing` before forwarding it across the network. @@ -195,11 +195,11 @@ Topics are post-fixed with an encoding. Encodings define how the payload of a go #### Interop -- `ssz` - All objects are SSZ-encoded. Example: The beacon block topic string is: `/beacon_block/ssz` and the data field of a gossipsub message is an ssz-encoded `BeaconBlock`. +- `ssz` - All objects are [SSZ-encoded](#ssz-encoding). Example: The beacon block topic string is `/beacon_block/ssz`, and the data field of a gossipsub message is an ssz-encoded `BeaconBlock`. #### Mainnet -- `ssz_snappy` - All objects are ssz-encoded and then compressed with snappy. Example: The beacon attestation topic string is: `/beacon_attestation/ssz_snappy` and the data field of a gossipsub message is an `Attestation` that has been ssz-encoded then compressed with snappy. +- `ssz_snappy` - All objects are SSZ-encoded and then compressed with [Snappy](https://github.com/google/snappy). Example: The beacon attestation topic string is `/beacon_attestation/ssz_snappy`, and the data field of a gossipsub message is an `Attestation` that has been SSZ-encoded and then compressed with Snappy. Implementations MUST use a single encoding. Changing an encoding will require coordination between participating implementations. @@ -217,16 +217,16 @@ With: - `ProtocolPrefix` - messages are grouped into families identified by a shared libp2p protocol name prefix. In this case, we use `/eth2/beacon_chain/req`. - `MessageName` - each request is identified by a name consisting of English alphabet, digits and underscores (`_`). -- `SchemaVersion` - an ordinal version number (e.g. 1, 2, 3…) Each schema is versioned to facilitate backward and forward-compatibility when possible. -- `Encoding` - while the schema defines the data types in more abstract terms, the encoding strategy describes a specific representation of bytes that will be transmitted over the wire. See the [Encodings](#Encoding-strategies) section, for further details. +- `SchemaVersion` - an ordinal version number (e.g. 1, 2, 3…). Each schema is versioned to facilitate backward and forward-compatibility when possible. +- `Encoding` - while the schema defines the data types in more abstract terms, the encoding strategy describes a specific representation of bytes that will be transmitted over the wire. See the [Encodings](#Encoding-strategies) section for further details. -This protocol segregation allows libp2p `multistream-select 1.0` / `multiselect 2.0` to handle the request type, version and encoding negotiation before establishing the underlying streams. +This protocol segregation allows libp2p `multistream-select 1.0` / `multiselect 2.0` to handle the request type, version, and encoding negotiation before establishing the underlying streams. ### Req/Resp interaction We use ONE stream PER request/response interaction. Streams are closed when the interaction finishes, whether in success or in error. -Request/response messages MUST adhere to the encoding specified in the protocol name, and follow this structure (relaxed BNF grammar): +Request/response messages MUST adhere to the encoding specified in the protocol name and follow this structure (relaxed BNF grammar): ``` request ::= | @@ -234,19 +234,19 @@ response ::= | | result ::= “0” | “1” | “2” | [“128” ... ”255”] ``` -The encoding-dependent header may carry metadata or assertions such as the encoded payload length, for integrity and attack proofing purposes. It is not strictly necessary to length-prefix payloads, because req/resp streams are single-use, and stream closures implicitly delimit the boundaries, but certain encodings like SSZ do, for added security. +The encoding-dependent header may carry metadata or assertions such as the encoded payload length, for integrity and attack proofing purposes. Because req/resp streams are single-use and stream closures implicitly delimit the boundaries, it is not strictly necessary to length-prefix payloads; however, certain encodings like SSZ do, for added security. `encoded-payload` has a maximum byte size of `REQ_RESP_MAX_SIZE`. -Clients MUST ensure the payload size is less than or equal to `REQ_RESP_MAX_SIZE`, if not, they SHOULD reset the stream immediately. Clients tracking peer reputation MAY decrement the score of the misbehaving peer under this circumstance. +Clients MUST ensure the payload size is less than or equal to `REQ_RESP_MAX_SIZE`; if not, they SHOULD reset the stream immediately. Clients tracking peer reputation MAY decrement the score of the misbehaving peer under this circumstance. #### Requesting side Once a new stream with the protocol ID for the request type has been negotiated, the full request message should be sent immediately. It should be encoded according to the encoding strategy. -The requester MUST close the write side of the stream once it finishes writing the request message - at this point, the stream will be half-closed. +The requester MUST close the write side of the stream once it finishes writing the request message—at this point, the stream will be half-closed. -The requester MUST wait a maximum of `TTFB_TIMEOUT` for the first response byte to arrive (time to first byte – or TTFB – timeout). On that happening, the requester will allow further `RESP_TIMEOUT` to receive the full response. +The requester MUST wait a maximum of `TTFB_TIMEOUT` for the first response byte to arrive (time to first byte—or TTFB—timeout). On that happening, the requester will allow further `RESP_TIMEOUT` to receive the full response. If any of these timeouts fire, the requester SHOULD reset the stream and deem the req/resp operation to have failed. @@ -257,12 +257,12 @@ Once a new stream with the protocol ID for the request type has been negotiated, The responder MUST: 1. Use the encoding strategy to read the optional header. -2. If there are any length assertions for length `N`, it should read exactly `N` bytes from the stream, at which point an EOF should arise (no more bytes). Should this is not the case, it should be treated as a failure. +2. If there are any length assertions for length `N`, it should read exactly `N` bytes from the stream, at which point an EOF should arise (no more bytes). Should this not be the case, it should be treated as a failure. 3. Deserialize the expected type, and process the request. 4. Write the response (result, optional header, payload). 5. Close their write side of the stream. At this point, the stream will be fully closed. -If steps (1), (2) or (3) fail due to invalid, malformed or inconsistent data, the responder MUST respond in error. Clients tracking peer reputation MAY record such failures, as well as unexpected events, e.g. early stream resets. +If steps (1), (2), or (3) fail due to invalid, malformed, or inconsistent data, the responder MUST respond in error. Clients tracking peer reputation MAY record such failures, as well as unexpected events, e.g. early stream resets. The entire request should be read in no more than `RESP_TIMEOUT`. Upon a timeout, the responder SHOULD reset the stream. @@ -276,7 +276,7 @@ It can have one of the following values, encoded as a single unsigned byte: Clients MAY use response codes above `128` to indicate alternative, erroneous request-specific responses. -The range `[3, 127]` is RESERVED for future usages, and should be treated as error if not recognised expressly. +The range `[3, 127]` is RESERVED for future usages, and should be treated as error if not recognized expressly. The `ErrorMessage` schema is: @@ -286,7 +286,7 @@ The `ErrorMessage` schema is: ) ``` -*Note that the String type is encoded as UTF-8 bytes without NULL terminator when SSZ-encoded.* +*Note*: The String type is encoded as UTF-8 bytes without NULL terminator when SSZ-encoded. A response therefore has the form: ``` @@ -294,22 +294,22 @@ A response therefore has the form: | result | header (opt) | encoded_response | +--------+--------+--------+--------+--------+--------+ ``` -Here `result` represents the 1-byte response code. +Here, `result` represents the 1-byte response code. ### Encoding strategies The token of the negotiated protocol ID specifies the type of encoding to be used for the req/resp interaction. Two values are possible at this time: -- `ssz`: the contents are [SSZ](https://github.com/ethereum/eth2.0-specs/blob/192442be51a8a6907d6401dffbf5c73cb220b760/specs/networking/libp2p-standardization.md#ssz-encoding) encoded. This encoding type MUST be supported by all clients. -- `ssz_snappy`: the contents are SSZ encoded, and subsequently compressed with [Snappy](https://github.com/google/snappy). MAY be supported in the interoperability testnet; and MUST be supported in mainnet. +- `ssz`: The contents are [SSZ-encoded](#ssz-encoding). This encoding type MUST be supported by all clients. +- `ssz_snappy`: The contents are SSZ-encoded and then compressed with [Snappy](https://github.com/google/snappy). MAY be supported in the interoperability testnet; MUST be supported in mainnet. -#### SSZ encoding strategy (with or without Snappy) +#### SSZ-encoding strategy (with or without Snappy) -The [SimpleSerialize (SSZ) specification](https://github.com/ethereum/eth2.0-specs/blob/192442be51a8a6907d6401dffbf5c73cb220b760/specs/simple-serialize.md) outlines how objects are SSZ-encoded. If the Snappy variant is selected, we feed the serialised form to the Snappy compressor on encoding. The inverse happens on decoding. +The [SimpleSerialize (SSZ) specification](../simple-serialize.md) outlines how objects are SSZ-encoded. If the Snappy variant is selected, we feed the serialized form to the Snappy compressor on encoding. The inverse happens on decoding. **Encoding-dependent header:** Req/Resp protocols using the `ssz` or `ssz_snappy` encoding strategies MUST prefix all encoded and compressed (if applicable) payloads with an unsigned [protobuf varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). -Note that parameters defined as `[]VariableName` are SSZ-encoded containerless vectors. +*Note*: Parameters defined as `[]VariableName` are SSZ-encoded containerless vectors. ### Messages @@ -329,10 +329,10 @@ Note that parameters defined as `[]VariableName` are SSZ-encoded containerless v ``` The fields are: -- `fork_version`: The beacon_state `Fork` version -- `finalized_root`: The latest finalized root the node knows about -- `finalized_epoch`: The latest finalized epoch the node knows about -- `head_root`: The block hash tree root corresponding to the head of the chain as seen by the sending node +- `fork_version`: The beacon_state `Fork` version. +- `finalized_root`: The latest finalized root the node knows about. +- `finalized_epoch`: The latest finalized epoch the node knows about. +- `head_root`: The block hash tree root corresponding to the head of the chain as seen by the sending node. - `head_slot`: The slot corresponding to the `head_root`. Clients exchange hello messages upon connection, forming a two-phase handshake. The first message the initiating client sends MUST be the hello message. In response, the receiving client MUST respond with its own hello message. @@ -415,7 +415,7 @@ Response Content: Requests blocks by their block roots. The response is a list of `BeaconBlock` with the same length as the request. Blocks are returned in order of the request and any missing/unknown blocks are left empty (SSZ null `BeaconBlock`). -`RecentBeaconBlocks` is primarily used to recover recent blocks, for example when receiving a block or attestation whose parent is unknown. +`RecentBeaconBlocks` is primarily used to recover recent blocks (ex. when receiving a block or attestation whose parent is unknown). Clients MUST support requesting blocks since the latest finalized epoch. @@ -448,9 +448,9 @@ Specifications of these parameters can be found in the [ENR Specification](http: #### Interop -In the interoperability testnet, all peers will support all capabilities defined in this document (gossip, full Req/Resp suite, discovery protocol), therefore the ENR record does not need to carry ETH2 capability information, as it would be superfluous. +In the interoperability testnet, all peers will support all capabilities defined in this document (gossip, full Req/Resp suite, discovery protocol), therefore the ENR record does not need to carry Eth 2.0 capability information, as it would be superfluous. -Nonetheless, ENRs MUST carry a generic `eth2` key with nil value, denoting that the peer is indeed an ETH2 peer, in order to eschew connecting to ETH1 peers. +Nonetheless, ENRs MUST carry a generic `eth2` key with nil value, denoting that the peer is indeed an Eth 2.0 peer, in order to eschew connecting to Eth 1.0 peers. #### Mainnet @@ -466,15 +466,15 @@ This feature will not be used in the interoperability testnet. In mainnet, we plan to use discv5’s topic advertisement feature as a rendezvous facility for peers on shards (thus subscribing to the relevant gossipsub topics). -# Design Decision Rationale +# Design decision rationale ## Transport ### Why are we defining specific transports? -libp2p peers can listen on multiple transports concurrently, and these can change over time. multiaddrs not only encode the address, but also the transport to be used to dial. +libp2p peers can listen on multiple transports concurrently, and these can change over time. Multiaddrs encode not only the address but also the transport to be used to dial. -Due to this dynamic nature, agreeing on specific transports like TCP, QUIC or WebSockets on paper becomes irrelevant. +Due to this dynamic nature, agreeing on specific transports like TCP, QUIC, or WebSockets on paper becomes irrelevant. However, it is useful to define a minimum baseline for interoperability purposes. @@ -482,34 +482,34 @@ However, it is useful to define a minimum baseline for interoperability purposes Clients may support other transports such as libp2p QUIC, WebSockets, and WebRTC transports, if available in the language of choice. While interoperability shall not be harmed by lack of such support, the advantages are desirable: -- better latency, performance and other QoS characteristics (QUIC). -- paving the way for interfacing with future light clients (WebSockets, WebRTC). +- Better latency, performance, and other QoS characteristics (QUIC). +- Paving the way for interfacing with future light clients (WebSockets, WebRTC). -The libp2p QUIC transport inherently relies on TLS 1.3 per requirement in section 7 of the [QUIC protocol specification](https://tools.ietf.org/html/draft-ietf-quic-transport-22#section-7), and the accompanying [QUIC-TLS document](https://tools.ietf.org/html/draft-ietf-quic-tls-22). +The libp2p QUIC transport inherently relies on TLS 1.3 per requirement in section 7 of the [QUIC protocol specification](https://tools.ietf.org/html/draft-ietf-quic-transport-22#section-7) and the accompanying [QUIC-TLS document](https://tools.ietf.org/html/draft-ietf-quic-tls-22). -The usage of one handshake procedure or the other shall be transparent to the ETH 2.0 application layer, once the libp2p Host/Node object has been configured appropriately. +The usage of one handshake procedure or the other shall be transparent to the Eth 2.0 application layer, once the libp2p Host/Node object has been configured appropriately. -### What are advantages of using TCP/QUIC/Websockets? +### What are the advantages of using TCP/QUIC/Websockets? -TCP is a reliable, ordered, full-duplex, congestion controlled network protocol that powers much of the Internet as we know it today. HTTP/1.1 and HTTP/2 run atop TCP. +TCP is a reliable, ordered, full-duplex, congestion-controlled network protocol that powers much of the Internet as we know it today. HTTP/1.1 and HTTP/2 run atop TCP. -QUIC is a new protocol that’s in the final stages of specification by the IETF QUIC WG. It emerged from Google’s SPDY experiment. The QUIC transport is undoubtedly promising. It’s UDP based yet reliable, ordered, reduces latency vs. TCP, is multiplexed, natively secure (TLS 1.3), offers stream-level and connection-level congestion control (thus removing head-of-line blocking), 0-RTT connection establishment, and endpoint migration, amongst other features. UDP also has better NAT traversal properties than TCP -- something we desperately pursue in peer-to-peer networks. +QUIC is a new protocol that’s in the final stages of specification by the IETF QUIC WG. It emerged from Google’s SPDY experiment. The QUIC transport is undoubtedly promising. It’s UDP-based yet reliable, ordered, multiplexed, natively secure (TLS 1.3), reduces latency vs. TCP, and offers stream-level and connection-level congestion control (thus removing head-of-line blocking), 0-RTT connection establishment, and endpoint migration, amongst other features. UDP also has better NAT traversal properties than TCP—something we desperately pursue in peer-to-peer networks. -QUIC is being adopted as the underlying protocol for HTTP/3. This has the potential to award us censorship resistance via deep packet inspection for free. Provided that we use the same port numbers and encryption mechanisms as HTTP/3, our traffic may be indistinguishable from standard web traffic, and we may only become subject to standard IP-based firewall filtering -- something we can counteract via other mechanisms. +QUIC is being adopted as the underlying protocol for HTTP/3. This has the potential to award us censorship resistance via deep packet inspection for free. Provided that we use the same port numbers and encryption mechanisms as HTTP/3, our traffic may be indistinguishable from standard web traffic, and we may only become subject to standard IP-based firewall filtering—something we can counteract via other mechanisms. -WebSockets and/or WebRTC transports are necessary for interaction with browsers, and will become increasingly important as we incorporate browser-based light clients to the ETH2 network. +WebSockets and/or WebRTC transports are necessary for interaction with browsers, and will become increasingly important as we incorporate browser-based light clients to the Eth 2.0 network. ### Why do we not just support a single transport? Networks evolve. Hardcoding design decisions leads to ossification, preventing the evolution of networks alongside the state of the art. Introducing changes on an ossified protocol is very costly, and sometimes, downright impracticable without causing undesirable breakage. -Modelling for upgradeability and dynamic transport selection from the get-go lays the foundation for a future-proof stack. +Modeling for upgradeability and dynamic transport selection from the get-go lays the foundation for a future-proof stack. -Clients can adopt new transports without breaking old ones; and the multi-transport ability enables constrained and sandboxed environments (e.g. browsers, embedded devices) to interact with the network as first-class citizens via suitable/native transports (e.g. WSS), without the need for proxying or trust delegation to servers. +Clients can adopt new transports without breaking old ones, and the multi-transport ability enables constrained and sandboxed environments (e.g. browsers, embedded devices) to interact with the network as first-class citizens via suitable/native transports (e.g. WSS), without the need for proxying or trust delegation to servers. ### Why are we not using QUIC for mainnet from the start? -The QUIC standard is still not finalised (at working draft 22 at the time of writing), and not all mainstream runtimes/languages have mature, standard, and/or fully-interoperable [QUIC support](https://github.com/quicwg/base-drafts/wiki/Implementations). One remarkable example is node.js, where the QUIC implementation is [in early development](https://github.com/nodejs/quic). +The QUIC standard is still not finalized (at working draft 22 at the time of writing), and not all mainstream runtimes/languages have mature, standard, and/or fully-interoperable [QUIC support](https://github.com/quicwg/base-drafts/wiki/Implementations). One remarkable example is node.js, where the QUIC implementation is [in early development](https://github.com/nodejs/quic). ## Multiplexing @@ -517,17 +517,17 @@ The QUIC standard is still not finalised (at working draft 22 at the time of wri [Yamux](https://github.com/hashicorp/yamux/blob/master/spec.md) is a multiplexer invented by Hashicorp that supports stream-level congestion control. Implementations exist in a limited set of languages, and it’s not a trivial piece to develop. -Conscious of that, the libp2p community conceptualised [mplex](https://github.com/libp2p/specs/blob/master/mplex/README.md) as a simple, minimal multiplexer for usage with libp2p. It does not support stream-level congestion control, and is subject to head-of-line blocking. +Conscious of that, the libp2p community conceptualized [mplex](https://github.com/libp2p/specs/blob/master/mplex/README.md) as a simple, minimal multiplexer for usage with libp2p. It does not support stream-level congestion control and is subject to head-of-line blocking. -Overlay multiplexers are not necessary with QUIC, as the protocol provides native multiplexing, but they need to be layered atop TCP, WebSockets, and other transports that lack such support. +Overlay multiplexers are not necessary with QUIC since the protocol provides native multiplexing, but they need to be layered atop TCP, WebSockets, and other transports that lack such support. -## Protocol Negotiation +## Protocol negotiation ### When is multiselect 2.0 due and why are we using it for mainnet? -multiselect 2.0 is currently being conceptualised. Debate started [on this issue](https://github.com/libp2p/specs/pull/95), but it got overloaded – as it tends to happen with large conceptual OSS discussions that touch the heart and core of a system. +multiselect 2.0 is currently being conceptualized. The debate started [on this issue](https://github.com/libp2p/specs/pull/95), but it got overloaded—as it tends to happen with large conceptual OSS discussions that touch the heart and core of a system. -In the following weeks (August 2019), there will be a renewed initiative to first define the requirements, constraints, assumptions and features, in order to lock in basic consensus upfront, to subsequently build on that consensus by submitting a specification for implementation. +In the following weeks (August 2019), there will be a renewed initiative to first define the requirements, constraints, assumptions, and features, in order to lock in basic consensus upfront and subsequently build on that consensus by submitting a specification for implementation. We plan to use multiselect 2.0 for mainnet because it will: @@ -563,35 +563,34 @@ SecIO is not considered secure for the purposes of this spec. ### Why are we using Noise/TLS 1.3 for mainnet? -Copied from the Noise Protocol Framework website: +Copied from the Noise Protocol Framework [website](http://www.noiseprotocol.org): > Noise is a framework for building crypto protocols. Noise protocols support mutual and optional authentication, identity hiding, forward secrecy, zero round-trip encryption, and other advanced features. Noise in itself does not specify a single handshake procedure, but provides a framework to build secure handshakes based on Diffie-Hellman key agreement with a variety of tradeoffs and guarantees. -Noise handshakes are lightweight and simple to understand, and are used in major cryptographic-centric projects like WireGuard, I2P, Lightning. [Various](https://www.wireguard.com/papers/kobeissi-bhargavan-noise-explorer-2018.pdf) [studies](https://eprint.iacr.org/2019/436.pdf) have assessed the stated security goals of several Noise handshakes with positive results. +Noise handshakes are lightweight and simple to understand, and are used in major cryptographic-centric projects like WireGuard, I2P, and Lightning. [Various](https://www.wireguard.com/papers/kobeissi-bhargavan-noise-explorer-2018.pdf) [studies](https://eprint.iacr.org/2019/436.pdf) have assessed the stated security goals of several Noise handshakes with positive results. On the other hand, TLS 1.3 is the newest, simplified iteration of TLS. Old, insecure, obsolete ciphers and algorithms have been removed, adopting Ed25519 as the sole ECDH key agreement function. Handshakes are faster, 1-RTT data is supported, and session resumption is a reality, amongst other features. -Note that [TLS 1.3 is a prerequisite of the QUIC transport](https://tools.ietf.org/html/draft-ietf-quic-transport-22#section-7), although an experiment exists to integrate Noise as the QUIC crypto layer: [nQUIC](https://eprint.iacr.org/2019/028). +*Note*: [TLS 1.3 is a prerequisite of the QUIC transport](https://tools.ietf.org/html/draft-ietf-quic-transport-22#section-7), although an experiment exists to integrate Noise as the QUIC crypto layer: [nQUIC](https://eprint.iacr.org/2019/028). ### Why are we using encryption at all? Transport level encryption secures message exchange and provides properties that are useful for privacy, safety, and censorship resistance. These properties are derived from the following security guarantees that apply to the entire communication between two peers: -- Peer authentication: the peer I’m talking to is really who they claim to be, and who I expect them to be. +- Peer authentication: the peer I’m talking to is really who they claim to be and who I expect them to be. - Confidentiality: no observer can eavesdrop on the content of our messages. - Integrity: the data has not been tampered with by a third-party while in transit. - Non-repudiation: the originating peer cannot dispute that they sent the message. - Depending on the chosen algorithms and mechanisms (e.g. continuous HMAC), we may obtain additional guarantees, such as non-replayability (this byte could’ve only been sent *now;* e.g. by using continuous HMACs), or perfect forward secrecy (in the case that a peer key is compromised, the content of a past conversation will not be compromised). -Note that transport-level encryption is not exclusive of application-level encryption or cryptography. Transport-level encryption secures the communication itself, while application-level cryptography is necessary for the application’s use cases (e.g. signatures, randomness, etc.) +Note that transport-level encryption is not exclusive of application-level encryption or cryptography. Transport-level encryption secures the communication itself, while application-level cryptography is necessary for the application’s use cases (e.g. signatures, randomness, etc.). ### Will mainnnet networking be untested when it launches? Before launching mainnet, the testnet will be switched over to mainnet networking parameters, including Noise handshakes, and other new protocols. This gives us an opportunity to drill coordinated network upgrades and verifying that there are no significant upgradeability gaps. - ## Gossipsub ### Why are we using a pub/sub algorithm for block and attestation propagation? @@ -606,27 +605,27 @@ For future extensibility with almost zero overhead now (besides the extra bytes ### How do we upgrade gossip channels (e.g. changes in encoding, compression)? -Changing gossipsub / broadcasts requires a coordinated upgrade where all clients start publishing to the new topic together, for example during a hard fork. +Changing gossipsub/broadcasts requires a coordinated upgrade where all clients start publishing to the new topic together, for example during a hard fork. -One can envision a two-phase deployment as well where clients start listening to the new topic in a first phase then start publishing some time later, letting the traffic naturally move over to the new topic. +One can envision a two-phase deployment as well where clients start listening to the new topic in the first phase then start publishing some time later, letting the traffic naturally move over to the new topic. ### Why must all clients use the same gossip topic instead of one negotiated between each peer pair? -Supporting multiple topics / encodings would require the presence of relayers to translate between encodings and topics so as to avoid network fragmentation where participants have diverging views on the gossiped state, making the protocol more complicated and fragile. +Supporting multiple topics/encodings would require the presence of relayers to translate between encodings and topics so as to avoid network fragmentation where participants have diverging views on the gossiped state, making the protocol more complicated and fragile. -Gossip protocols typically remember what messages they've seen for a finite period of time based on message identity - if you publish the same message again after that time has passed, it will be re-broadcast - adding a relay delay also makes this scenario more likely. +Gossip protocols typically remember what messages they've seen for a finite period of time-based on message identity—if you publish the same message again after that time has passed, it will be re-broadcast—adding a relay delay also makes this scenario more likely. -One can imagine that in a complicated upgrade scenario, we might have peers publishing the same message on two topics/encodings, but the price here is pretty high in terms of overhead - both computational and networking, so we'd rather avoid that. +One can imagine that in a complicated upgrade scenario, we might have peers publishing the same message on two topics/encodings, but the price here is pretty high in terms of overhead—both computational and networking—so we'd rather avoid that. It is permitted for clients to publish data on alternative topics as long as they also publish on the network-wide mandatory topic. ### Why are the topics strings and not hashes? -Topics names have a hierarchical structure. In the future, gossipsub may support wildcard subscriptions (e.g. subscribe to all children topics under a root prefix) by way of prefix matching. Enforcing hashes for topic names would preclude us from leveraging such features going forward. +Topic names have a hierarchical structure. In the future, gossipsub may support wildcard subscriptions (e.g. subscribe to all children topics under a root prefix) by way of prefix matching. Enforcing hashes for topic names would preclude us from leveraging such features going forward. No security or privacy guarantees are lost as a result of choosing plaintext topic names, since the domain is finite anyway, and calculating a digest's preimage would be trivial. -Furthermore, the ETH2 topic names are shorter than their digest equivalents (assuming SHA-256 hash), so hashing topics would bloat messages unnecessarily. +Furthermore, the Eth 2.0 topic names are shorter than their digest equivalents (assuming SHA-256 hash), so hashing topics would bloat messages unnecessarily. ### Why are there `SHARD_SUBNET_COUNT` subnets, and why is this not defined? @@ -642,7 +641,7 @@ The prohibition of unverified-block-gossiping extends to nodes that cannot verif ### How are we going to discover peers in a gossipsub topic? -Via discv5 topics. ENRs should not be used for this purpose, as they store identity, location and capability info, not volatile [advertisements](#topic-advertisement). +Via discv5 topics. ENRs should not be used for this purpose, as they store identity, location, and capability information, not volatile [advertisements](#topic-advertisement). In the interoperability testnet, all peers will be subscribed to all global beacon chain topics, so discovering peers in specific shard topics will be unnecessary. @@ -652,23 +651,23 @@ In the interoperability testnet, all peers will be subscribed to all global beac Requests are segregated by protocol ID to: -1. Leverage protocol routing in libp2p, such that the libp2p stack will route the incoming stream to the appropriate handler. This allows each the handler function for each request type to be self-contained. For an analogy, think about how you attach HTTP handlers to a REST API server. +1. Leverage protocol routing in libp2p, such that the libp2p stack will route the incoming stream to the appropriate handler. This allows the handler function for each request type to be self-contained. For an analogy, think about how you attach HTTP handlers to a REST API server. 2. Version requests independently. In a coarser-grained umbrella protocol, the entire protocol would have to be versioned even if just one field in a single message changed. 3. Enable clients to select the individual requests/versions they support. It would no longer be a strict requirement to support all requests, and clients, in principle, could support a subset of requests and variety of versions. 4. Enable flexibility and agility for clients adopting spec changes that impact the request, by signalling to peers exactly which subset of new/old requests they support. 5. Enable clients to explicitly choose backwards compatibility at the request granularity. Without this, clients would be forced to support entire versions of the coarser request protocol. -6. Parallelise RFCs (or ETH2 EIPs). By decoupling requests from one another, each RFC that affects the request protocol can be deployed/tested/debated independently without relying on a synchronisation point to version the general top-level protocol. +6. Parallelise RFCs (or Eth 2.0 EIPs). By decoupling requests from one another, each RFC that affects the request protocol can be deployed/tested/debated independently without relying on a synchronization point to version the general top-level protocol. 1. This has the benefit that clients can explicitly choose which RFCs to deploy without buying into all other RFCs that may be included in that top-level version. 2. Affording this level of granularity with a top-level protocol would imply creating as many variants (e.g. /protocol/43-{a,b,c,d,...}) as the cartesian product of RFCs inflight, O(n^2). 7. Allow us to simplify the payload of requests. Request-id’s and method-ids no longer need to be sent. The encoding/request type and version can all be handled by the framework. -CAVEAT: the protocol negotiation component in the current version of libp2p is called multistream-select 1.0. It is somewhat naïve and introduces overhead on every request when negotiating streams, although implementation-specific optimizations are possible to save this cost. Multiselect 2.0 will remove this overhead by memoizing previously selected protocols, and modelling shared protocol tables. Fortunately this req/resp protocol is not the expected network bottleneck in the protocol so the additional overhead is not expected to hinder interop testing. More info is to be released from the libp2p community in the coming weeks. +**Caveat**: The protocol negotiation component in the current version of libp2p is called multistream-select 1.0. It is somewhat naïve and introduces overhead on every request when negotiating streams, although implementation-specific optimizations are possible to save this cost. Multiselect 2.0 will remove this overhead by memoizing previously selected protocols, and modeling shared protocol tables. Fortunately, this req/resp protocol is not the expected network bottleneck in the protocol so the additional overhead is not expected to hinder interop testing. More info is to be released from the libp2p community in the coming weeks. -### Why are messages length-prefixed with a protobuf varint in the SSZ encoding? +### Why are messages length-prefixed with a protobuf varint in the SSZ-encoding? -We are using single-use streams where each stream is closed at the end of the message - thus libp2p transparently handles message delimiting in the underlying stream. libp2p streams are full-duplex, and each party is responsible for closing their write side (like in TCP). We can therefore use stream closure to mark the end of the request and response independently. +We are using single-use streams where each stream is closed at the end of the message. Thus, libp2p transparently handles message delimiting in the underlying stream. libp2p streams are full-duplex, and each party is responsible for closing their write side (like in TCP). We can therefore use stream closure to mark the end of the request and response independently. -Nevertheless, messages are still length-prefixed - this is now being considered for removal. +Nevertheless, messages are still length-prefixed—this is now being considered for removal. Advantages of length-prefixing include: @@ -678,17 +677,17 @@ Advantages of length-prefixing include: Disadvantages include: -* Redundant methods of message delimiting - both stream end marker and length prefix +* Redundant methods of message delimiting—both stream end marker and length prefix * Harder to stream as length must be known up-front * Additional code path required to verify length -In some protocols, adding a length prefix serves as a form of DoS protection against very long messages, allowing the client to abort if an overlong message is about to be sent. In this protocol, we are globally limiting message sizes using `REQ_RESP_MAX_SIZE`, thus an the length prefix does not afford any additional protection. +In some protocols, adding a length prefix serves as a form of DoS protection against very long messages, allowing the client to abort if an overlong message is about to be sent. In this protocol, we are globally limiting message sizes using `REQ_RESP_MAX_SIZE`, thus the length prefix does not afford any additional protection. [Protobuf varint](https://developers.google.com/protocol-buffers/docs/encoding#varints) is an efficient technique to encode variable-length ints. Instead of reserving a fixed-size field of as many bytes as necessary to convey the maximum possible value, this field is elastic in exchange for 1-bit overhead per byte. ### Why do we version protocol strings with ordinals instead of semver? -Using semver for network protocols is confusing. It is never clear what a change in a field, even if backwards compatible on deserialisation, actually implies. Network protocol agreement should be explicit. Imagine two peers: +Using semver for network protocols is confusing. It is never clear what a change in a field, even if backwards compatible on deserialization, actually implies. Network protocol agreement should be explicit. Imagine two peers: - Peer A supporting v1.1.1 of protocol X. - Peer B supporting v1.1.2 of protocol X. @@ -697,9 +696,9 @@ These two peers should never speak to each other because the results can be unpr For this reason, we rely on negotiation of explicit, verbatim protocols. In the above case, peer B would provide backwards compatibility by supporting and advertising both v1.1.1 and v1.1.2 of the protocol. -Therefore, semver would be relegated to convey expectations at the human level, and it wouldn't do a good job there either, because it's unclear if "backwards-compatibility" and "breaking change" apply only to wire schema level, to behaviour, etc. +Therefore, semver would be relegated to convey expectations at the human level, and it wouldn't do a good job there either, because it's unclear if "backwards compatibility" and "breaking change" apply only to wire schema level, to behavior, etc. -For this reason, we remove semver out of the picture and replace it with ordinals that require explicit agreement and do not mandate a specific policy for changes. +For this reason, we remove and replace semver with ordinals that require explicit agreement and do not mandate a specific policy for changes. ### Why is it called Req/Resp and not RPC? @@ -713,7 +712,7 @@ discv5 is a standalone protocol, running on UDP on a dedicated port, meant for p On the other hand, libp2p Kademlia DHT is a fully-fledged DHT protocol/implementation with content routing and storage capabilities, both of which are irrelevant in this context. -We assume that ETH1 nodes will evolve to support discv5. By sharing the discovery network between ETH1 and ETH2, we benefit from the additive effect on network size that enhances resilience and resistance against certain attacks, to which smaller networks are more vulnerable. It should also assist light clients of both networks find nodes with specific capabilities. +We assume that Eth 1.0 nodes will evolve to support discv5. By sharing the discovery network between Eth 1.0 and 2.0, we benefit from the additive effect on network size that enhances resilience and resistance against certain attacks, to which smaller networks are more vulnerable. It should also help light clients of both networks find nodes with specific capabilities. discv5 is in the process of being audited. @@ -723,41 +722,41 @@ Ethereum Node Records are self-certified node records. Nodes craft and dissemina ENRs are key-value records with string-indexed ASCII keys. They can store arbitrary information, but EIP-778 specifies a pre-defined dictionary, including IPv4 and IPv6 addresses, secp256k1 public keys, etc. -Comparing ENRs and multiaddrs is like comparing apples and bananas. ENRs are self-certified containers of identity, addresses, and metadata about a node. Multiaddrs are address strings with the peculiarity that they’re self-describing, composable and future-proof. An ENR can contain multiaddrs, and multiaddrs can be derived securely from the fields of an authenticated ENR. +Comparing ENRs and multiaddrs is like comparing apples and oranges. ENRs are self-certified containers of identity, addresses, and metadata about a node. Multiaddrs are address strings with the peculiarity that they’re self-describing, composable and future-proof. An ENR can contain multiaddrs, and multiaddrs can be derived securely from the fields of an authenticated ENR. discv5 uses ENRs and we will presumably need to: 1. Add `multiaddr` to the dictionary, so that nodes can advertise their multiaddr under a reserved namespace in ENRs. – and/or – -2. Define a bi-directional conversion function between multiaddrs and the corresponding denormalized fields in an ENR (ip, ip6, tcp, tcp6, etc.), for compatibility with nodes that do not support multiaddr natively (e.g. ETH1 nodes). +2. Define a bi-directional conversion function between multiaddrs and the corresponding denormalized fields in an ENR (ip, ip6, tcp, tcp6, etc.), for compatibility with nodes that do not support multiaddr natively (e.g. Eth 1.0 nodes). ## Compression/Encoding ### Why are we using SSZ for encoding? -SSZ is used at the consensus layer and all implementations should have support for ssz encoding/decoding requiring no further dependencies to be added to client implementations. This is a natural choice for serializing objects to be sent across the wire. The actual data in most protocols will be further compressed for efficiency. +SSZ is used at the consensus layer, and all implementations should have support for SSZ-encoding/decoding, requiring no further dependencies to be added to client implementations. This is a natural choice for serializing objects to be sent across the wire. The actual data in most protocols will be further compressed for efficiency. -SSZ has well defined schema’s for consensus objects (typically sent across the wire) reducing any serialization schema data that needs to be sent. It also has defined all required types that are required for this network specification. +SSZ has well-defined schemas for consensus objects (typically sent across the wire) reducing any serialization schema data that needs to be sent. It also has defined all required types that are required for this network specification. ### Why are we compressing, and at which layers? -We compress on the wire to achieve smaller payloads per-message, which, in aggregate, result in higher efficiency, better utilisation of available bandwidth, and overall reduction in network-wide traffic overhead. +We compress on the wire to achieve smaller payloads per-message, which, in aggregate, result in higher efficiency, better utilization of available bandwidth, and overall reduction in network-wide traffic overhead. -At this time, libp2p does not have an out-of-the-box compression feature that can be dynamically negotiated and layered atop connections and streams, but is [being considered](https://github.com/libp2p/libp2p/issues/81). +At this time, libp2p does not have an out-of-the-box compression feature that can be dynamically negotiated and layered atop connections and streams, but it is [being considered](https://github.com/libp2p/libp2p/issues/81). -This is a non-trivial feature because the behaviour of network IO loops, kernel buffers, chunking, packet fragmentation, amongst others, need to be taken into account. libp2p streams are unbounded streams, whereas compression algorithms work best on bounded byte streams of which we have some prior knowledge. +This is a non-trivial feature because the behavior of network IO loops, kernel buffers, chunking, and packet fragmentation, amongst others, need to be taken into account. libp2p streams are unbounded streams, whereas compression algorithms work best on bounded byte streams of which we have some prior knowledge. -Compression tends not to be a one-size-fits-all problem. Lots of variables need careful evaluation, and generic approaches/choices lead to poor size shavings, which may even be counterproductive when factoring in the CPU and memory tradeoff. +Compression tends not to be a one-size-fits-all problem. A lot of variables need careful evaluation, and generic approaches/choices lead to poor size shavings, which may even be counterproductive when factoring in the CPU and memory tradeoff. For all these reasons, generically negotiating compression algorithms may be treated as a research problem at the libp2p community, one we’re happy to tackle in the medium-term. At this stage, the wisest choice is to consider libp2p a messenger of bytes, and to make application layer participate in compressing those bytes. This looks different depending on the interaction layer: -- Gossip domain: since gossipsub has a framing protocol and exposes an API, we compress the payload (when dictated by the encoding token in the topic name) prior to publishing the message via the API. No length prefixing is necessary because protobuf takes care of bounding the field in the serialised form. +- Gossip domain: since gossipsub has a framing protocol and exposes an API, we compress the payload (when dictated by the encoding token in the topic name) prior to publishing the message via the API. No length prefixing is necessary because protobuf takes care of bounding the field in the serialized form. - Req/Resp domain: since we define custom protocols that operate on byte streams, implementers are encouraged to encapsulate the encoding and compression logic behind MessageReader and MessageWriter components/strategies that can be layered on top of the raw byte streams. ### Why are using Snappy for compression? -Snappy is used in Ethereum 1.0. It is well maintained by Google, has good benchmarks and can calculate the size of the uncompressed object without inflating it in memory. This prevents DOS vectors where large uncompressed data is sent. +Snappy is used in Ethereum 1.0. It is well maintained by Google, has good benchmarks, and can calculate the size of the uncompressed object without inflating it in memory. This prevents DOS vectors where large uncompressed data is sent. ### Can I get access to unencrypted bytes on the wire for debugging purposes? @@ -767,6 +766,6 @@ If your libp2p library relies on frameworks/runtimes such as Netty (jvm) or Node For specific ad-hoc testing scenarios, you can use the [plaintext/2.0.0 secure channel](https://github.com/libp2p/specs/blob/master/plaintext/README.md) (which is essentially no-op encryption or message authentication), in combination with tcpdump or Wireshark to inspect the wire. -# libp2p Implementations Matrix +# libp2p implementations matrix -This section will soon contain a matrix showing the maturity/state of the libp2p features required by this spec across the languages in which ETH 2.0 clients are being developed. +This section will soon contain a matrix showing the maturity/state of the libp2p features required by this spec across the languages in which Eth 2.0 clients are being developed. From 0e7287eda5ea7601707a5a4e2167f98fab699644 Mon Sep 17 00:00:00 2001 From: Jim McDonald Date: Fri, 9 Aug 2019 19:09:04 +0100 Subject: [PATCH 104/130] Add link from DepositData reference to definition --- specs/core/0_deposit-contract.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/core/0_deposit-contract.md b/specs/core/0_deposit-contract.md index af81c6bec..ade1006a0 100644 --- a/specs/core/0_deposit-contract.md +++ b/specs/core/0_deposit-contract.md @@ -38,7 +38,7 @@ The initial deployment phases of Ethereum 2.0 are implemented without consensus ### `deposit` function -The deposit contract has a public `deposit` function to make deposits. It takes as arguments `pubkey: bytes[48], withdrawal_credentials: bytes[32], signature: bytes[96]` corresponding to a `DepositData` object. +The deposit contract has a public `deposit` function to make deposits. It takes as arguments `pubkey: bytes[48], withdrawal_credentials: bytes[32], signature: bytes[96]` corresponding to a [`DepositData`](./0_beacon-chain.md#depositdata) object. #### Deposit amount From 5290b62465379f33ff15361a12eb2811d41d4832 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Sun, 11 Aug 2019 22:21:58 +0800 Subject: [PATCH 105/130] Fix + refactor `is_valid_beacon_attestation` and add basic test --- specs/core/1_shard-data-chains.md | 21 ++++---- .../test_beacon_attestation.py | 48 +++++++++++++++++++ 2 files changed, 59 insertions(+), 10 deletions(-) create mode 100644 test_libs/pyspec/eth2spec/test/phase_1/shard_data_chain/test_beacon_attestation.py diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index 283c1a9ca..079c0b4b7 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -555,16 +555,14 @@ def shard_block_transition(state: ShardState, Let: -- `shard` be a valid `Shard` - `pre_state` is the `ShardState` before processing any blocks -- `shard_blocks_or_state_roots` be the `Union[ShardBlock, Hash]` list such that `shard_blocks[slot]` is the canonical `ShardBlock` for shard `shard` at slot `slot` if a block exists, or the post-state-root of processing state up to and including that slot if a block does not exist. +- `shard_blocks_or_state_roots` be the `Union[ShardBlock, Hash]` list such that `shard_blocks[slot]` is the canonical `ShardBlock` for shard `pre_state.shard` at slot `slot` if a block exists, or the post-state-root of processing state up to and including that slot if a block does not exist. - `beacon_state` be the canonical `BeaconState` - `valid_attestations` be the set of valid `Attestation` objects, recursively defined - `candidate` be a candidate `Attestation` which is valid under Phase 0 rules, and for which validity is to be determined under Phase 1 rules by running `is_valid_beacon_attestation` ```python -def is_valid_beacon_attestation(shard: Shard, - pre_state: ShardState, +def is_valid_beacon_attestation(pre_state: ShardState, shard_blocks_or_state_roots: Sequence[Union[ShardBlock, Hash]], beacon_state: BeaconState, valid_attestations: Set[Attestation], @@ -587,7 +585,7 @@ def is_valid_beacon_attestation(shard: Shard, assert candidate.data.previous_attestation.epoch < compute_epoch_of_slot(candidate.data.slot) # Check crosslink data root - start_epoch = beacon_state.crosslinks[shard].epoch + start_epoch = beacon_state.crosslinks[pre_state.shard].epoch end_epoch = min(compute_epoch_of_slot(candidate.data.slot) - CROSSLINK_LOOKBACK, start_epoch + MAX_EPOCHS_PER_CROSSLINK) blocks = [] @@ -595,11 +593,14 @@ def is_valid_beacon_attestation(shard: Shard, if isinstance(shard_blocks_or_state_roots[slot], ShardBlock): blocks.append(shard_blocks_or_state_roots[slot]) else: - blocks.append(ShardBlockHeader(ShardBlockCore( - slot=slot, - state_root=shard_blocks_or_state_roots[slot], - total_bytes=pre_state.total_bytes - ), ShardBlockSignatures())) + blocks.append(ShardBlock( + core=ExtendedShardBlockCore( + slot=slot, + state_root=shard_blocks_or_state_roots[slot], + total_bytes=pre_state.total_bytes, + ), + signatures=ShardBlockSignatures(), + )) assert candidate.data.crosslink.data_root == compute_crosslink_data_root(blocks) return True diff --git a/test_libs/pyspec/eth2spec/test/phase_1/shard_data_chain/test_beacon_attestation.py b/test_libs/pyspec/eth2spec/test/phase_1/shard_data_chain/test_beacon_attestation.py new file mode 100644 index 000000000..aface905b --- /dev/null +++ b/test_libs/pyspec/eth2spec/test/phase_1/shard_data_chain/test_beacon_attestation.py @@ -0,0 +1,48 @@ +from eth2spec.test.context import ( + with_all_phases_except, + spec_state_test, + always_bls, +) +from eth2spec.test.helpers.phase1.shard_block import ( + build_empty_shard_block, +) +from eth2spec.test.helpers.attestations import get_valid_attestation + + +@with_all_phases_except(['phase0']) +@always_bls +@spec_state_test +def test_process_empty_shard_block(spec, state): + beacon_state = state + + shard_slot = spec.PHASE_1_FORK_SLOT + beacon_state.slot = spec.Slot(spec.PHASE_1_FORK_EPOCH * spec.SLOTS_PER_EPOCH) + shard_state = spec.get_default_shard_state(beacon_state, shard=spec.Shard(0)) + shard_state.slot = shard_slot + + block = build_empty_shard_block( + spec, + shard_state, + beacon_state, + slot=shard_slot + 1, + parent_root=spec.Hash(), + signed=True, + full_attestation=True, + ) + + yield 'pre', shard_state + yield 'beacon_state', beacon_state + yield 'block', block + + beacon_attestation = get_valid_attestation(spec, beacon_state, signed=True) + yield 'beacon_attestation', beacon_attestation + + is_valid_beacon_attestation = spec.is_valid_beacon_attestation( + pre_state=shard_state, + shard_blocks_or_state_roots=(block,), + beacon_state=beacon_state, + valid_attestations=set([beacon_attestation]), + candidate=beacon_attestation, + ) + assert is_valid_beacon_attestation + yield 'is_valid_beacon_attestation', is_valid_beacon_attestation From b345dc0f5fc3aa9b7ded17e04c004f357fbf6389 Mon Sep 17 00:00:00 2001 From: dankrad Date: Sun, 11 Aug 2019 10:05:17 -0700 Subject: [PATCH 106/130] Legendre custody construction (#1305) * Stab at Legendre custody construction + some tests * Fix some problems and fix function puller to remove phase0 only lines in phase1 * Pass the linter * Add headings * Fix domain for BLS stub * Change Jacobi to Legendre * n -> q to clarify notation * Headings * Another missing heading * Custody subchunks via padding * Fix max_reveal_lateness stuff * Better names for reveal period functions * Better parametrization of max_reveal_lateness computation and tests for custody reveal processing * Fix linter * Allow challenging for one period after the custody reveal, shorter periods for minimal tests * Fix lint * Fix linter error --- configs/constant_presets/minimal.yaml | 4 + scripts/build_spec.py | 21 +- specs/core/0_beacon-chain.md | 9 +- specs/core/1_custody-game.md | 176 ++++++--- .../test/fork_choice/test_on_attestation.py | 4 +- .../pyspec/eth2spec/test/helpers/custody.py | 132 ++++++- .../pyspec/eth2spec/test/helpers/deposits.py | 2 +- .../test_process_attestation.py | 2 +- .../test_process_bit_challenge.py | 347 ++++++++++++++++++ .../test_process_custody_key_reveal.py | 118 ++++++ ...est_process_early_derived_secret_reveal.py | 18 +- test_libs/pyspec/eth2spec/utils/bls.py | 6 + .../pyspec/eth2spec/utils/merkle_minimal.py | 11 +- 13 files changed, 777 insertions(+), 73 deletions(-) create mode 100644 test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_bit_challenge.py create mode 100644 test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_custody_key_reveal.py diff --git a/configs/constant_presets/minimal.yaml b/configs/constant_presets/minimal.yaml index ab8aab3c4..15b749b9d 100644 --- a/configs/constant_presets/minimal.yaml +++ b/configs/constant_presets/minimal.yaml @@ -74,6 +74,10 @@ MAX_EPOCHS_PER_CROSSLINK: 4 MIN_EPOCHS_TO_INACTIVITY_PENALTY: 4 # [customized] 2**12 (= 4,096) epochs EARLY_DERIVED_SECRET_PENALTY_MAX_FUTURE_EPOCHS: 4096 +# 2**2 (= 4) epochs +EPOCHS_PER_CUSTODY_PERIOD: 4 +# 2**2 (= 4) epochs +CUSTODY_PERIOD_TO_RANDAO_PADDING: 4 # State vector lengths diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 52642c8f4..133834429 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -58,10 +58,15 @@ from eth2spec.utils.bls import ( bls_aggregate_pubkeys, bls_verify, bls_verify_multiple, + bls_signature_to_G2, ) from eth2spec.utils.hash_function import hash ''' +SUNDRY_CONSTANTS_FUNCTIONS = ''' +def ceillog2(x: uint64) -> int: + return (x - 1).bit_length() +''' SUNDRY_FUNCTIONS = ''' # Monkey patch hash cache _hash = hash @@ -111,6 +116,13 @@ def apply_constants_preset(preset: Dict[str, Any]) -> None: ''' +def remove_for_phase1(functions: Dict[str, str]): + for key, value in functions.items(): + lines = value.split("\n") + lines = filter(lambda s: "[to be removed in phase 1]" not in s, lines) + functions[key] = "\n".join(lines) + + def strip_comments(raw: str) -> str: comment_line_regex = re.compile(r'^\s+# ') lines = raw.split('\n') @@ -141,10 +153,15 @@ def objects_to_spec(functions: Dict[str, str], ] ) ) + for k in list(functions): + if "ceillog2" in k: + del functions[k] functions_spec = '\n\n'.join(functions.values()) for k in list(constants.keys()): if k.startswith('DOMAIN_'): constants[k] = f"DomainType(({constants[k]}).to_bytes(length=4, byteorder='little'))" + if k == "BLS12_381_Q": + constants[k] += " # noqa: E501" constants_spec = '\n'.join(map(lambda x: '%s = %s' % (x, constants[x]), constants)) ssz_objects_instantiation_spec = '\n\n'.join(ssz_objects.values()) ssz_objects_reinitialization_spec = ( @@ -157,6 +174,7 @@ def objects_to_spec(functions: Dict[str, str], spec = ( imports + '\n\n' + new_type_definitions + + '\n' + SUNDRY_CONSTANTS_FUNCTIONS + '\n\n' + constants_spec + '\n\n\n' + ssz_objects_instantiation_spec + '\n\n' + functions_spec @@ -186,7 +204,7 @@ ignored_dependencies = [ 'bit', 'boolean', 'Vector', 'List', 'Container', 'Hash', 'BLSPubkey', 'BLSSignature', 'Bytes', 'BytesN' 'Bytes1', 'Bytes4', 'Bytes32', 'Bytes48', 'Bytes96', 'Bitlist', 'Bitvector', 'uint8', 'uint16', 'uint32', 'uint64', 'uint128', 'uint256', - 'bytes' # to be removed after updating spec doc + 'bytes', 'byte', 'BytesN' # to be removed after updating spec doc ] @@ -268,6 +286,7 @@ def build_phase1_spec(phase0_sourcefile: str, fork_choice_sourcefile: str, outfile: str=None) -> Optional[str]: phase0_spec = get_spec(phase0_sourcefile) + remove_for_phase1(phase0_spec[0]) phase1_custody = get_spec(phase1_custody_sourcefile) phase1_shard_data = get_spec(phase1_shard_sourcefile) fork_choice_spec = get_spec(fork_choice_sourcefile) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index f0169f1d2..1eddae58e 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -660,8 +660,8 @@ def is_valid_indexed_attestation(state: BeaconState, indexed_attestation: Indexe bit_1_indices = indexed_attestation.custody_bit_1_indices # Verify no index has custody bit equal to 1 [to be removed in phase 1] - if not len(bit_1_indices) == 0: - return False + if not len(bit_1_indices) == 0: # [to be removed in phase 1] + return False # [to be removed in phase 1] # Verify max number of indices if not len(bit_0_indices) + len(bit_1_indices) <= MAX_VALIDATORS_PER_COMMITTEE: return False @@ -1661,6 +1661,11 @@ def process_attestation(state: BeaconState, attestation: Attestation) -> None: proposer_index=get_beacon_proposer_index(state), ) + # Check bitlist lengths + committee_size = get_committee_count(state, attestation.data.target.epoch) + assert len(attestation.aggregation_bits) == committee_size + assert len(attestation.custody_bits) == committee_size + if data.target.epoch == get_current_epoch(state): assert data.source == state.current_justified_checkpoint parent_crosslink = state.current_crosslinks[data.crosslink.shard] diff --git a/specs/core/1_custody-game.md b/specs/core/1_custody-game.md index f79977442..3e0a38102 100644 --- a/specs/core/1_custody-game.md +++ b/specs/core/1_custody-game.md @@ -12,6 +12,7 @@ - [Terminology](#terminology) - [Constants](#constants) - [Misc](#misc) + - [Custody game parameters](#custody-game-parameters) - [Time parameters](#time-parameters) - [Max operations per block](#max-operations-per-block) - [Reward and penalty quotients](#reward-and-penalty-quotients) @@ -33,12 +34,14 @@ - [`BeaconBlockBody`](#beaconblockbody) - [Helpers](#helpers) - [`ceillog2`](#ceillog2) + - [`is_valid_merkle_branch_with_mixin`](#is_valid_merkle_branch_with_mixin) - [`get_crosslink_chunk_count`](#get_crosslink_chunk_count) - - [`get_bit`](#get_bit) + - [`legendre_bit`](#legendre_bit) + - [`custody_subchunkify`](#custody_subchunkify) - [`get_custody_chunk_bit`](#get_custody_chunk_bit) - [`get_chunk_bits_root`](#get_chunk_bits_root) - [`get_randao_epoch_for_custody_period`](#get_randao_epoch_for_custody_period) - - [`get_reveal_period`](#get_reveal_period) + - [`get_custody_period_for_validator`](#get_custody_period_for_validator) - [`replace_empty_or_append`](#replace_empty_or_append) - [Per-block processing](#per-block-processing) - [Operations](#operations) @@ -75,11 +78,20 @@ This document details the beacon chain additions and changes in Phase 1 of Ether ### Misc +| `BLS12_381_Q` | `4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787` | +| `MINOR_REWARD_QUOTIENT` | `2**8` (= 256) | + +### Custody game parameters + | Name | Value | | - | - | | `BYTES_PER_SHARD_BLOCK` | `2**14` (= 16,384) | | `BYTES_PER_CUSTODY_CHUNK` | `2**9` (= 512) | -| `MINOR_REWARD_QUOTIENT` | `2**8` (= 256) | +| `BYTES_PER_CUSTODY_SUBCHUNK` | `48` | +| `CHUNKS_PER_EPOCH` | `2 * BYTES_PER_SHARD_BLOCK * SLOTS_PER_EPOCH // BYTES_PER_CUSTODY_CHUNK` | +| `MAX_CUSTODY_CHUNKS` | `MAX_EPOCHS_PER_CROSSLINK * CHUNKS_PER_EPOCH` | +| `CUSTODY_DATA_DEPTH` | `ceillog2(MAX_CUSTODY_CHUNKS) + 1` | +| `CUSTODY_CHUNK_BIT_DEPTH` | `ceillog2(MAX_EPOCHS_PER_CROSSLINK * CHUNKS_PER_EPOCH // 256) + 2` | ### Time parameters @@ -144,7 +156,7 @@ class CustodyBitChallenge(Container): attestation: Attestation challenger_index: ValidatorIndex responder_key: BLSSignature - chunk_bits: Bytes[PLACEHOLDER] + chunk_bits: Bitlist[MAX_CUSTODY_CHUNKS] signature: BLSSignature ``` @@ -181,10 +193,10 @@ class CustodyBitChallengeRecord(Container): class CustodyResponse(Container): challenge_index: uint64 chunk_index: uint64 - chunk: Vector[Bytes[PLACEHOLDER], BYTES_PER_CUSTODY_CHUNK] - data_branch: List[Hash, PLACEHOLDER] - chunk_bits_branch: List[Hash, PLACEHOLDER] - chunk_bits_leaf: Hash + chunk: BytesN[BYTES_PER_CUSTODY_CHUNK] + data_branch: List[Hash, CUSTODY_DATA_DEPTH] + chunk_bits_branch: List[Hash, CUSTODY_CHUNK_BIT_DEPTH] + chunk_bits_leaf: Bitvector[256] ``` ### New beacon operations @@ -225,11 +237,11 @@ Add the following fields to the end of the specified container objects. Fields w ```python class Validator(Container): - # next_custody_reveal_period is initialised to the custody period + # next_custody_secret_to_reveal is initialised to the custody period # (of the particular validator) in which the validator is activated - # = get_reveal_period(...) - next_custody_reveal_period: uint64 - max_reveal_lateness: uint64 + # = get_custody_period_for_validator(...) + next_custody_secret_to_reveal: uint64 + max_reveal_lateness: Epoch ``` #### `BeaconState` @@ -263,7 +275,26 @@ class BeaconBlockBody(Container): ```python def ceillog2(x: uint64) -> int: - return x.bit_length() + return (x - 1).bit_length() +``` + +### `is_valid_merkle_branch_with_mixin` + +```python +def is_valid_merkle_branch_with_mixin(leaf: Hash, + branch: Sequence[Hash], + depth: uint64, + index: uint64, + root: Hash, + mixin: uint64) -> bool: + value = leaf + for i in range(depth): + if index // (2**i) % 2: + value = hash(branch[i] + value) + else: + value = hash(value + branch[i]) + value = hash(value + mixin.to_bytes(32, "little")) + return value == root ``` ### `get_crosslink_chunk_count` @@ -271,37 +302,69 @@ def ceillog2(x: uint64) -> int: ```python def get_custody_chunk_count(crosslink: Crosslink) -> int: crosslink_length = min(MAX_EPOCHS_PER_CROSSLINK, crosslink.end_epoch - crosslink.start_epoch) - chunks_per_epoch = 2 * BYTES_PER_SHARD_BLOCK * SLOTS_PER_EPOCH // BYTES_PER_CUSTODY_CHUNK - return crosslink_length * chunks_per_epoch + return crosslink_length * CHUNKS_PER_EPOCH ``` -### `get_bit` +### `legendre_bit` + +Returns the Legendre symbol `(a/q)` normalizes as a bit (i.e. `((a/q) + 1) // 2`). In a production implementation, a well-optimized library (e.g. GMP) should be used for this. ```python -def get_bit(serialization: bytes, i: uint64) -> int: - """ - Extract the bit in ``serialization`` at position ``i``. - """ - return (serialization[i // 8] >> (i % 8)) % 2 +def legendre_bit(a: int, q: int) -> int: + if a >= q: + return legendre_bit(a % q, q) + if a == 0: + return 0 + assert(q > a > 0 and q % 2 == 1) + t = 1 + n = q + while a != 0: + while a % 2 == 0: + a //= 2 + r = n % 8 + if r == 3 or r == 5: + t = -t + a, n = n, a + if a % 4 == n % 4 == 3: + t = -t + a %= n + if n == 1: + return (t + 1) // 2 + else: + return 0 +``` + +### ```custody_subchunkify``` + +Given one proof of custody chunk, returns the proof of custody subchunks of the correct sizes. + +```python +def custody_subchunkify(bytez: bytes) -> list: + bytez += b'\x00' * (-len(bytez) % BYTES_PER_CUSTODY_SUBCHUNK) + return [bytez[i:i + BYTES_PER_CUSTODY_SUBCHUNK] + for i in range(0, len(bytez), BYTES_PER_CUSTODY_SUBCHUNK)] ``` ### `get_custody_chunk_bit` ```python def get_custody_chunk_bit(key: BLSSignature, chunk: bytes) -> bool: - # TODO: Replace with something MPC-friendly, e.g. the Legendre symbol - return bool(get_bit(hash(key + chunk), 0)) + full_G2_element = bls_signature_to_G2(key) + s = full_G2_element[0].coeffs + bits = [legendre_bit((i + 1) * s[i % 2] + int.from_bytes(subchunk, "little"), BLS12_381_Q) + for i, subchunk in enumerate(custody_subchunkify(chunk))] + + return bool(sum(bits) % 2) ``` ### `get_chunk_bits_root` ```python -def get_chunk_bits_root(chunk_bits: bytes) -> Hash: - aggregated_bits = bytearray([0] * 32) - for i in range(0, len(chunk_bits), 32): - for j in range(32): - aggregated_bits[j] ^= chunk_bits[i + j] - return hash(aggregated_bits) +def get_chunk_bits_root(chunk_bits: Bitlist[MAX_CUSTODY_CHUNKS]) -> bit: + aggregated_bits = 0 + for i, b in enumerate(chunk_bits): + aggregated_bits += 2**i * b + return legendre_bit(aggregated_bits, BLS12_381_Q) ``` ### `get_randao_epoch_for_custody_period` @@ -312,10 +375,10 @@ def get_randao_epoch_for_custody_period(period: uint64, validator_index: Validat return Epoch(next_period_start + CUSTODY_PERIOD_TO_RANDAO_PADDING) ``` -### `get_reveal_period` +### `get_custody_period_for_validator` ```python -def get_reveal_period(state: BeaconState, validator_index: ValidatorIndex, epoch: Epoch=None) -> int: +def get_custody_period_for_validator(state: BeaconState, validator_index: ValidatorIndex, epoch: Epoch=None) -> int: ''' Return the reveal period for a given validator. ''' @@ -354,9 +417,9 @@ def process_custody_key_reveal(state: BeaconState, reveal: CustodyKeyReveal) -> Note that this function mutates ``state``. """ revealer = state.validators[reveal.revealer_index] - epoch_to_sign = get_randao_epoch_for_custody_period(revealer.next_custody_reveal_period, reveal.revealed_index) + epoch_to_sign = get_randao_epoch_for_custody_period(revealer.next_custody_secret_to_reveal, reveal.revealer_index) - assert revealer.next_custody_reveal_period < get_reveal_period(state, reveal.revealed_index) + assert revealer.next_custody_secret_to_reveal < get_custody_period_for_validator(state, reveal.revealer_index) # Revealed validator is active or exited, but not withdrawn assert is_slashable_validator(revealer, get_current_epoch(state)) @@ -374,15 +437,19 @@ def process_custody_key_reveal(state: BeaconState, reveal: CustodyKeyReveal) -> ) # Decrement max reveal lateness if response is timely - if revealer.next_custody_reveal_period == get_reveal_period(state, reveal.revealer_index) - 2: - revealer.max_reveal_lateness -= MAX_REVEAL_LATENESS_DECREMENT - revealer.max_reveal_lateness = max( - revealer.max_reveal_lateness, - get_reveal_period(state, reveal.revealed_index) - revealer.next_custody_reveal_period - ) + if epoch_to_sign + EPOCHS_PER_CUSTODY_PERIOD >= get_current_epoch(state): + if revealer.max_reveal_lateness >= MAX_REVEAL_LATENESS_DECREMENT: + revealer.max_reveal_lateness -= MAX_REVEAL_LATENESS_DECREMENT + else: + revealer.max_reveal_lateness = 0 + else: + revealer.max_reveal_lateness = max( + revealer.max_reveal_lateness, + get_current_epoch(state) - epoch_to_sign - EPOCHS_PER_CUSTODY_PERIOD + ) # Process reveal - revealer.next_custody_reveal_period += 1 + revealer.next_custody_secret_to_reveal += 1 # Reward Block Preposer proposer_index = get_beacon_proposer_index(state) @@ -520,7 +587,7 @@ For each `challenge` in `block.body.custody_bit_challenges`, run the following f ```python def process_bit_challenge(state: BeaconState, challenge: CustodyBitChallenge) -> None: attestation = challenge.attestation - epoch = compute_epoch_of_slot(attestation.data.slot) + epoch = attestation.data.target.epoch shard = attestation.data.crosslink.shard # Verify challenge signature @@ -533,7 +600,10 @@ def process_bit_challenge(state: BeaconState, challenge: CustodyBitChallenge) -> assert is_valid_indexed_attestation(state, get_indexed_attestation(state, attestation)) # Verify attestation is eligible for challenging responder = state.validators[challenge.responder_index] - assert epoch + responder.max_reveal_lateness <= get_reveal_period(state, challenge.responder_index) + assert get_current_epoch(state) <= get_randao_epoch_for_custody_period( + get_custody_period_for_validator(state, challenge.responder_index, epoch), + challenge.responder_index + ) + 2 * EPOCHS_PER_CUSTODY_PERIOD + responder.max_reveal_lateness # Verify the responder participated in the attestation attesters = get_attesting_indices(state, attestation.data, attestation.aggregation_bits) @@ -543,17 +613,18 @@ def process_bit_challenge(state: BeaconState, challenge: CustodyBitChallenge) -> assert record.challenger_index != challenge.challenger_index # Verify the responder custody key epoch_to_sign = get_randao_epoch_for_custody_period( - get_reveal_period(state, challenge.responder_index, epoch), + get_custody_period_for_validator(state, challenge.responder_index, epoch), challenge.responder_index, ) domain = get_domain(state, DOMAIN_RANDAO, epoch_to_sign) assert bls_verify(responder.pubkey, hash_tree_root(epoch_to_sign), challenge.responder_key, domain) # Verify the chunk count chunk_count = get_custody_chunk_count(attestation.data.crosslink) - # Verify the first bit of the hash of the chunk bits does not equal the custody bit + assert chunk_count == len(challenge.chunk_bits) + # Verify custody bit is incorrect committee = get_crosslink_committee(state, epoch, shard) custody_bit = attestation.custody_bits[committee.index(challenge.responder_index)] - assert custody_bit != get_bit(get_chunk_bits_root(challenge.chunk_bits), 0) + assert custody_bit != get_chunk_bits_root(challenge.chunk_bits) # Add new bit challenge record new_record = CustodyBitChallengeRecord( challenge_index=state.custody_challenge_index, @@ -636,16 +707,17 @@ def process_bit_challenge_response(state: BeaconState, root=challenge.data_root, ) # Verify the chunk bit leaf matches the challenge data - assert is_valid_merkle_branch( - leaf=response.chunk_bits_leaf, + assert is_valid_merkle_branch_with_mixin( + leaf=hash_tree_root(response.chunk_bits_leaf), branch=response.chunk_bits_branch, - depth=ceillog2(challenge.chunk_count) >> 8, + depth=ceillog2(MAX_CUSTODY_CHUNKS // 256), index=response.chunk_index // 256, - root=challenge.chunk_bits_merkle_root + root=challenge.chunk_bits_merkle_root, + mixin=challenge.chunk_count, ) # Verify the chunk bit does not match the challenge chunk bit assert (get_custody_chunk_bit(challenge.responder_key, response.chunk) - != get_bit(challenge.chunk_bits_leaf, response.chunk_index % 256)) + != response.chunk_bits_leaf[response.chunk_index % 256]) # Clear the challenge records = state.custody_bit_challenge_records records[records.index(challenge)] = CustodyBitChallengeRecord() @@ -665,8 +737,8 @@ Run `process_reveal_deadlines(state)` immediately after `process_registry_update # end insert @process_reveal_deadlines def process_reveal_deadlines(state: BeaconState) -> None: for index, validator in enumerate(state.validators): - deadline = validator.next_custody_reveal_period + (CUSTODY_RESPONSE_DEADLINE // EPOCHS_PER_CUSTODY_PERIOD) - if get_reveal_period(state, ValidatorIndex(index)) > deadline: + deadline = validator.next_custody_secret_to_reveal + (CUSTODY_RESPONSE_DEADLINE // EPOCHS_PER_CUSTODY_PERIOD) + if get_custody_period_for_validator(state, ValidatorIndex(index)) > deadline: slash_validator(state, ValidatorIndex(index)) ``` diff --git a/test_libs/pyspec/eth2spec/test/fork_choice/test_on_attestation.py b/test_libs/pyspec/eth2spec/test/fork_choice/test_on_attestation.py index 400675888..ee1c04219 100644 --- a/test_libs/pyspec/eth2spec/test/fork_choice/test_on_attestation.py +++ b/test_libs/pyspec/eth2spec/test/fork_choice/test_on_attestation.py @@ -1,4 +1,4 @@ -from eth2spec.test.context import with_all_phases, with_state, bls_switch +from eth2spec.test.context import with_all_phases, with_state, bls_switch, with_phases from eth2spec.test.helpers.block import build_empty_block_for_next_slot from eth2spec.test.helpers.attestations import get_valid_attestation @@ -103,7 +103,7 @@ def test_on_attestation_same_slot(spec, state): run_on_attestation(spec, state, store, attestation, False) -@with_all_phases +@with_phases(['phase0']) @with_state @bls_switch def test_on_attestation_invalid_attestation(spec, state): diff --git a/test_libs/pyspec/eth2spec/test/helpers/custody.py b/test_libs/pyspec/eth2spec/test/helpers/custody.py index 36f23ad1c..4b7c8c97b 100644 --- a/test_libs/pyspec/eth2spec/test/helpers/custody.py +++ b/test_libs/pyspec/eth2spec/test/helpers/custody.py @@ -1,6 +1,11 @@ from eth2spec.test.helpers.keys import privkeys from eth2spec.utils.bls import bls_sign, bls_aggregate_signatures from eth2spec.utils.hash_function import hash +from eth2spec.utils.ssz.ssz_typing import Bitlist, BytesN, Bitvector +from eth2spec.utils.ssz.ssz_impl import chunkify, pack, hash_tree_root +from eth2spec.utils.merkle_minimal import get_merkle_tree, get_merkle_proof + +BYTES_PER_CHUNK = 32 def get_valid_early_derived_secret_reveal(spec, state, epoch=None): @@ -13,7 +18,7 @@ def get_valid_early_derived_secret_reveal(spec, state, epoch=None): # Generate the secret that is being revealed reveal = bls_sign( - message_hash=spec.hash_tree_root(spec.Epoch(epoch)), + message_hash=hash_tree_root(spec.Epoch(epoch)), privkey=privkeys[revealed_index], domain=spec.get_domain( state=state, @@ -42,3 +47,128 @@ def get_valid_early_derived_secret_reveal(spec, state, epoch=None): masker_index=masker_index, mask=mask, ) + + +def get_valid_custody_key_reveal(spec, state, period=None): + current_epoch = spec.get_current_epoch(state) + revealer_index = spec.get_active_validator_indices(state, current_epoch)[0] + revealer = state.validators[revealer_index] + + if period is None: + period = revealer.next_custody_secret_to_reveal + + epoch_to_sign = spec.get_randao_epoch_for_custody_period(period, revealer_index) + + # Generate the secret that is being revealed + reveal = bls_sign( + message_hash=hash_tree_root(spec.Epoch(epoch_to_sign)), + privkey=privkeys[revealer_index], + domain=spec.get_domain( + state=state, + domain_type=spec.DOMAIN_RANDAO, + message_epoch=epoch_to_sign, + ), + ) + return spec.CustodyKeyReveal( + revealer_index=revealer_index, + reveal=reveal, + ) + + +def bitlist_from_int(max_len, num_bits, n): + return Bitlist[max_len](*[(n >> i) & 0b1 for i in range(num_bits)]) + + +def get_valid_bit_challenge(spec, state, attestation, invalid_custody_bit=False): + crosslink_committee = spec.get_crosslink_committee( + state, + attestation.data.target.epoch, + attestation.data.crosslink.shard, + ) + responder_index = crosslink_committee[0] + challenger_index = crosslink_committee[-1] + + epoch = spec.get_randao_epoch_for_custody_period(attestation.data.target.epoch, + responder_index) + + # Generate the responder key + responder_key = bls_sign( + message_hash=hash_tree_root(spec.Epoch(epoch)), + privkey=privkeys[responder_index], + domain=spec.get_domain( + state=state, + domain_type=spec.DOMAIN_RANDAO, + message_epoch=epoch, + ), + ) + + chunk_count = spec.get_custody_chunk_count(attestation.data.crosslink) + + chunk_bits = bitlist_from_int(spec.MAX_CUSTODY_CHUNKS, chunk_count, 0) + + n = 0 + while spec.get_chunk_bits_root(chunk_bits) == attestation.custody_bits[0] ^ invalid_custody_bit: + chunk_bits = bitlist_from_int(spec.MAX_CUSTODY_CHUNKS, chunk_count, n) + n += 1 + + return spec.CustodyBitChallenge( + responder_index=responder_index, + attestation=attestation, + challenger_index=challenger_index, + responder_key=responder_key, + chunk_bits=chunk_bits, + ) + + +def custody_chunkify(spec, x): + chunks = [bytes(x[i:i + spec.BYTES_PER_CUSTODY_CHUNK]) for i in range(0, len(x), spec.BYTES_PER_CUSTODY_CHUNK)] + chunks[-1] = chunks[-1].ljust(spec.BYTES_PER_CUSTODY_CHUNK, b"\0") + return chunks + + +def get_valid_custody_response(spec, state, bit_challenge, custody_data, challenge_index, invalid_chunk_bit=False): + chunks = custody_chunkify(spec, custody_data) + + chunk_index = len(chunks) - 1 + chunk_bit = spec.get_custody_chunk_bit(bit_challenge.responder_key, chunks[chunk_index]) + + while chunk_bit == bit_challenge.chunk_bits[chunk_index] ^ invalid_chunk_bit: + chunk_index -= 1 + chunk_bit = spec.get_custody_chunk_bit(bit_challenge.responder_key, chunks[chunk_index]) + + chunks_hash_tree_roots = [hash_tree_root(BytesN[spec.BYTES_PER_CUSTODY_CHUNK](chunk)) for chunk in chunks] + chunks_hash_tree_roots += [ + hash_tree_root(BytesN[spec.BYTES_PER_CUSTODY_CHUNK](b"\0" * spec.BYTES_PER_CUSTODY_CHUNK)) + for i in range(2 ** spec.ceillog2(len(chunks)) - len(chunks))] + data_tree = get_merkle_tree(chunks_hash_tree_roots) + + data_branch = get_merkle_proof(data_tree, chunk_index) + + bitlist_chunk_index = chunk_index // BYTES_PER_CHUNK + bitlist_chunks = chunkify(pack(bit_challenge.chunk_bits)) + bitlist_tree = get_merkle_tree(bitlist_chunks, pad_to=spec.MAX_CUSTODY_CHUNKS // 256) + bitlist_chunk_branch = get_merkle_proof(bitlist_tree, chunk_index // 256) + \ + [len(bit_challenge.chunk_bits).to_bytes(32, "little")] + + bitlist_chunk_index = chunk_index // 256 + + chunk_bits_leaf = Bitvector[256](bit_challenge.chunk_bits[bitlist_chunk_index * 256: + (bitlist_chunk_index + 1) * 256]) + + return spec.CustodyResponse( + challenge_index=challenge_index, + chunk_index=chunk_index, + chunk=BytesN[spec.BYTES_PER_CUSTODY_CHUNK](chunks[chunk_index]), + data_branch=data_branch, + chunk_bits_branch=bitlist_chunk_branch, + chunk_bits_leaf=chunk_bits_leaf, + ) + + +def get_custody_test_vector(bytelength): + ints = bytelength // 4 + return b"".join(i.to_bytes(4, "little") for i in range(ints)) + + +def get_custody_merkle_root(data): + return get_merkle_tree(chunkify(data))[-1][0] diff --git a/test_libs/pyspec/eth2spec/test/helpers/deposits.py b/test_libs/pyspec/eth2spec/test/helpers/deposits.py index 8dc6b3b58..89574c977 100644 --- a/test_libs/pyspec/eth2spec/test/helpers/deposits.py +++ b/test_libs/pyspec/eth2spec/test/helpers/deposits.py @@ -47,7 +47,7 @@ def build_deposit(spec, deposit_data_list.append(deposit_data) root = hash_tree_root(List[spec.DepositData, 2**spec.DEPOSIT_CONTRACT_TREE_DEPTH](*deposit_data_list)) tree = calc_merkle_tree_from_leaves(tuple([d.hash_tree_root() for d in deposit_data_list])) - proof = list(get_merkle_proof(tree, item_index=index)) + [(index + 1).to_bytes(32, 'little')] + proof = list(get_merkle_proof(tree, item_index=index, tree_len=32)) + [(index + 1).to_bytes(32, 'little')] leaf = deposit_data.hash_tree_root() assert spec.is_valid_merkle_branch(leaf, proof, spec.DEPOSIT_CONTRACT_TREE_DEPTH + 1, index, root) deposit = spec.Deposit(proof=proof, data=deposit_data) diff --git a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py index ab46a0d8c..84cb95ba0 100644 --- a/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py +++ b/test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_process_attestation.py @@ -363,7 +363,7 @@ def test_inconsistent_bits(spec, state): attestation = get_valid_attestation(spec, state) state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY - custody_bits = attestation.aggregation_bits[:] + custody_bits = attestation.custody_bits[:] custody_bits.append(False) attestation.custody_bits = custody_bits diff --git a/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_bit_challenge.py b/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_bit_challenge.py new file mode 100644 index 000000000..e4880555a --- /dev/null +++ b/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_bit_challenge.py @@ -0,0 +1,347 @@ +from eth2spec.test.helpers.custody import ( + get_valid_bit_challenge, + get_valid_custody_response, + get_custody_test_vector, + get_custody_merkle_root +) +from eth2spec.test.helpers.attestations import ( + get_valid_attestation, +) +from eth2spec.utils.ssz.ssz_impl import hash_tree_root +from eth2spec.test.helpers.state import next_epoch, get_balance +from eth2spec.test.helpers.block import apply_empty_block +from eth2spec.test.context import ( + with_all_phases_except, + spec_state_test, + expect_assertion_error, +) +from eth2spec.test.phase_0.block_processing.test_process_attestation import run_attestation_processing + + +def run_bit_challenge_processing(spec, state, custody_bit_challenge, valid=True): + """ + Run ``process_bit_challenge``, yielding: + - pre-state ('pre') + - CustodyBitChallenge ('custody_bit_challenge') + - post-state ('post'). + If ``valid == False``, run expecting ``AssertionError`` + """ + yield 'pre', state + yield 'custody_bit_challenge', custody_bit_challenge + + if not valid: + expect_assertion_error(lambda: spec.process_bit_challenge(state, custody_bit_challenge)) + yield 'post', None + return + + spec.process_bit_challenge(state, custody_bit_challenge) + + assert state.custody_bit_challenge_records[state.custody_challenge_index - 1].chunk_bits_merkle_root == \ + hash_tree_root(custody_bit_challenge.chunk_bits) + assert state.custody_bit_challenge_records[state.custody_challenge_index - 1].challenger_index == \ + custody_bit_challenge.challenger_index + assert state.custody_bit_challenge_records[state.custody_challenge_index - 1].responder_index == \ + custody_bit_challenge.responder_index + + yield 'post', state + + +def run_custody_response_processing(spec, state, custody_response, valid=True): + """ + Run ``process_bit_challenge_response``, yielding: + - pre-state ('pre') + - CustodyResponse ('custody_response') + - post-state ('post'). + If ``valid == False``, run expecting ``AssertionError`` + """ + yield 'pre', state + yield 'custody_response', custody_response + + if not valid: + expect_assertion_error(lambda: spec.process_custody_response(state, custody_response)) + yield 'post', None + return + + # TODO: Add capability to also process chunk challenges, not only bit challenges + challenge = state.custody_bit_challenge_records[custody_response.challenge_index] + pre_slashed_balance = get_balance(state, challenge.challenger_index) + + spec.process_custody_response(state, custody_response) + + slashed_validator = state.validators[challenge.challenger_index] + + assert slashed_validator.slashed + assert slashed_validator.exit_epoch < spec.FAR_FUTURE_EPOCH + assert slashed_validator.withdrawable_epoch < spec.FAR_FUTURE_EPOCH + + assert get_balance(state, challenge.challenger_index) < pre_slashed_balance + yield 'post', state + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_challenge_appended(spec, state): + state.slot = spec.SLOTS_PER_EPOCH + attestation = get_valid_attestation(spec, state, signed=True) + + test_vector = get_custody_test_vector( + spec.get_custody_chunk_count(attestation.data.crosslink) * spec.BYTES_PER_CUSTODY_CHUNK) + shard_root = get_custody_merkle_root(test_vector) + attestation.data.crosslink.data_root = shard_root + attestation.custody_bits[0] = 0 + + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + + _, _, _ = run_attestation_processing(spec, state, attestation) + + state.slot += spec.SLOTS_PER_EPOCH * spec.EPOCHS_PER_CUSTODY_PERIOD + + challenge = get_valid_bit_challenge(spec, state, attestation) + + yield from run_bit_challenge_processing(spec, state, challenge) + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_multiple_epochs_custody(spec, state): + state.slot = spec.SLOTS_PER_EPOCH * 3 + attestation = get_valid_attestation(spec, state, signed=True) + + test_vector = get_custody_test_vector( + spec.get_custody_chunk_count(attestation.data.crosslink) * spec.BYTES_PER_CUSTODY_CHUNK) + shard_root = get_custody_merkle_root(test_vector) + attestation.data.crosslink.data_root = shard_root + attestation.custody_bits[0] = 0 + + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + + _, _, _ = run_attestation_processing(spec, state, attestation) + + state.slot += spec.SLOTS_PER_EPOCH * (spec.EPOCHS_PER_CUSTODY_PERIOD - 1) + + challenge = get_valid_bit_challenge(spec, state, attestation) + + yield from run_bit_challenge_processing(spec, state, challenge) + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_many_epochs_custody(spec, state): + state.slot = spec.SLOTS_PER_EPOCH * 100 + attestation = get_valid_attestation(spec, state, signed=True) + + test_vector = get_custody_test_vector( + spec.get_custody_chunk_count(attestation.data.crosslink) * spec.BYTES_PER_CUSTODY_CHUNK) + shard_root = get_custody_merkle_root(test_vector) + attestation.data.crosslink.data_root = shard_root + attestation.custody_bits[0] = 0 + + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + + _, _, _ = run_attestation_processing(spec, state, attestation) + + state.slot += spec.SLOTS_PER_EPOCH * (spec.EPOCHS_PER_CUSTODY_PERIOD - 1) + + challenge = get_valid_bit_challenge(spec, state, attestation) + + yield from run_bit_challenge_processing(spec, state, challenge) + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_off_chain_attestation(spec, state): + state.slot = spec.SLOTS_PER_EPOCH + attestation = get_valid_attestation(spec, state, signed=True) + + test_vector = get_custody_test_vector( + spec.get_custody_chunk_count(attestation.data.crosslink) * spec.BYTES_PER_CUSTODY_CHUNK) + shard_root = get_custody_merkle_root(test_vector) + attestation.data.crosslink.data_root = shard_root + attestation.custody_bits[0] = 0 + + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + state.slot += spec.SLOTS_PER_EPOCH * spec.EPOCHS_PER_CUSTODY_PERIOD + + challenge = get_valid_bit_challenge(spec, state, attestation) + + yield from run_bit_challenge_processing(spec, state, challenge) + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_invalid_custody_bit_challenge(spec, state): + state.slot = spec.SLOTS_PER_EPOCH + attestation = get_valid_attestation(spec, state, signed=True) + + test_vector = get_custody_test_vector( + spec.get_custody_chunk_count(attestation.data.crosslink) * spec.BYTES_PER_CUSTODY_CHUNK) + shard_root = get_custody_merkle_root(test_vector) + attestation.data.crosslink.data_root = shard_root + attestation.custody_bits[0] = 0 + + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + + _, _, _ = run_attestation_processing(spec, state, attestation) + + state.slot += spec.SLOTS_PER_EPOCH * spec.EPOCHS_PER_CUSTODY_PERIOD + + challenge = get_valid_bit_challenge(spec, state, attestation, invalid_custody_bit=True) + + yield from run_bit_challenge_processing(spec, state, challenge, valid=False) + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_max_reveal_lateness_1(spec, state): + next_epoch(spec, state) + apply_empty_block(spec, state) + + attestation = get_valid_attestation(spec, state, signed=True) + + test_vector = get_custody_test_vector( + spec.get_custody_chunk_count(attestation.data.crosslink) * spec.BYTES_PER_CUSTODY_CHUNK) + shard_root = get_custody_merkle_root(test_vector) + attestation.data.crosslink.data_root = shard_root + attestation.custody_bits[0] = 0 + + next_epoch(spec, state) + apply_empty_block(spec, state) + + _, _, _ = run_attestation_processing(spec, state, attestation) + + challenge = get_valid_bit_challenge(spec, state, attestation) + + responder_index = challenge.responder_index + + state.validators[responder_index].max_reveal_lateness = 3 + + for i in range(spec.get_randao_epoch_for_custody_period( + spec.get_custody_period_for_validator(state, responder_index), + responder_index + ) + 2 * spec.EPOCHS_PER_CUSTODY_PERIOD + state.validators[responder_index].max_reveal_lateness - 2): + next_epoch(spec, state) + apply_empty_block(spec, state) + + yield from run_bit_challenge_processing(spec, state, challenge) + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_max_reveal_lateness_2(spec, state): + next_epoch(spec, state) + apply_empty_block(spec, state) + + attestation = get_valid_attestation(spec, state, signed=True) + + test_vector = get_custody_test_vector( + spec.get_custody_chunk_count(attestation.data.crosslink) * spec.BYTES_PER_CUSTODY_CHUNK) + shard_root = get_custody_merkle_root(test_vector) + attestation.data.crosslink.data_root = shard_root + attestation.custody_bits[0] = 0 + + next_epoch(spec, state) + apply_empty_block(spec, state) + + _, _, _ = run_attestation_processing(spec, state, attestation) + + challenge = get_valid_bit_challenge(spec, state, attestation) + + responder_index = challenge.responder_index + + state.validators[responder_index].max_reveal_lateness = 3 + + for i in range(spec.get_randao_epoch_for_custody_period( + spec.get_custody_period_for_validator(state, responder_index), + responder_index + ) + 2 * spec.EPOCHS_PER_CUSTODY_PERIOD + state.validators[responder_index].max_reveal_lateness - 1): + next_epoch(spec, state) + apply_empty_block(spec, state) + + yield from run_bit_challenge_processing(spec, state, challenge, False) + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_custody_response(spec, state): + state.slot = spec.SLOTS_PER_EPOCH + attestation = get_valid_attestation(spec, state, signed=True) + + test_vector = get_custody_test_vector( + spec.get_custody_chunk_count(attestation.data.crosslink) * spec.BYTES_PER_CUSTODY_CHUNK) + shard_root = get_custody_merkle_root(test_vector) + attestation.data.crosslink.data_root = shard_root + attestation.custody_bits[0] = 0 + + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + + _, _, _ = run_attestation_processing(spec, state, attestation) + + state.slot += spec.SLOTS_PER_EPOCH * spec.EPOCHS_PER_CUSTODY_PERIOD + + challenge = get_valid_bit_challenge(spec, state, attestation) + + _, _, _ = run_bit_challenge_processing(spec, state, challenge) + + bit_challenge_index = state.custody_challenge_index - 1 + + custody_response = get_valid_custody_response(spec, state, challenge, test_vector, bit_challenge_index) + + yield from run_custody_response_processing(spec, state, custody_response) + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_custody_response_multiple_epochs(spec, state): + state.slot = spec.SLOTS_PER_EPOCH * 3 + attestation = get_valid_attestation(spec, state, signed=True) + + test_vector = get_custody_test_vector( + spec.get_custody_chunk_count(attestation.data.crosslink) * spec.BYTES_PER_CUSTODY_CHUNK) + shard_root = get_custody_merkle_root(test_vector) + attestation.data.crosslink.data_root = shard_root + attestation.custody_bits[0] = 0 + + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + + _, _, _ = run_attestation_processing(spec, state, attestation) + + state.slot += spec.SLOTS_PER_EPOCH * spec.EPOCHS_PER_CUSTODY_PERIOD + + challenge = get_valid_bit_challenge(spec, state, attestation) + + _, _, _ = run_bit_challenge_processing(spec, state, challenge) + + bit_challenge_index = state.custody_challenge_index - 1 + + custody_response = get_valid_custody_response(spec, state, challenge, test_vector, bit_challenge_index) + + yield from run_custody_response_processing(spec, state, custody_response) + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_custody_response_many_epochs(spec, state): + state.slot = spec.SLOTS_PER_EPOCH * 100 + attestation = get_valid_attestation(spec, state, signed=True) + + test_vector = get_custody_test_vector( + spec.get_custody_chunk_count(attestation.data.crosslink) * spec.BYTES_PER_CUSTODY_CHUNK) + shard_root = get_custody_merkle_root(test_vector) + attestation.data.crosslink.data_root = shard_root + attestation.custody_bits[0] = 0 + + state.slot += spec.MIN_ATTESTATION_INCLUSION_DELAY + + _, _, _ = run_attestation_processing(spec, state, attestation) + + state.slot += spec.SLOTS_PER_EPOCH * spec.EPOCHS_PER_CUSTODY_PERIOD + + challenge = get_valid_bit_challenge(spec, state, attestation) + + _, _, _ = run_bit_challenge_processing(spec, state, challenge) + + bit_challenge_index = state.custody_challenge_index - 1 + + custody_response = get_valid_custody_response(spec, state, challenge, test_vector, bit_challenge_index) + + yield from run_custody_response_processing(spec, state, custody_response) diff --git a/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_custody_key_reveal.py b/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_custody_key_reveal.py new file mode 100644 index 000000000..f8860cf87 --- /dev/null +++ b/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_custody_key_reveal.py @@ -0,0 +1,118 @@ +from eth2spec.test.helpers.custody import get_valid_custody_key_reveal +from eth2spec.test.context import ( + with_all_phases_except, + spec_state_test, + expect_assertion_error, + always_bls, +) + + +def run_custody_key_reveal_processing(spec, state, custody_key_reveal, valid=True): + """ + Run ``process_custody_key_reveal``, yielding: + - pre-state ('pre') + - custody_key_reveal ('custody_key_reveal') + - post-state ('post'). + If ``valid == False``, run expecting ``AssertionError`` + """ + yield 'pre', state + yield 'custody_key_reveal', custody_key_reveal + + if not valid: + expect_assertion_error(lambda: spec.process_custody_key_reveal(state, custody_key_reveal)) + yield 'post', None + return + + revealer_index = custody_key_reveal.revealer_index + + pre_next_custody_secret_to_reveal = \ + state.validators[revealer_index].next_custody_secret_to_reveal + pre_reveal_lateness = state.validators[revealer_index].max_reveal_lateness + + spec.process_custody_key_reveal(state, custody_key_reveal) + + post_next_custody_secret_to_reveal = \ + state.validators[revealer_index].next_custody_secret_to_reveal + post_reveal_lateness = state.validators[revealer_index].max_reveal_lateness + + assert post_next_custody_secret_to_reveal == pre_next_custody_secret_to_reveal + 1 + + if spec.get_current_epoch(state) > spec.get_randao_epoch_for_custody_period( + pre_next_custody_secret_to_reveal, + revealer_index + ) + spec.EPOCHS_PER_CUSTODY_PERIOD: + assert post_reveal_lateness > 0 + if pre_reveal_lateness == 0: + assert post_reveal_lateness == spec.get_current_epoch(state) - spec.get_randao_epoch_for_custody_period( + pre_next_custody_secret_to_reveal, + revealer_index + ) - spec.EPOCHS_PER_CUSTODY_PERIOD + else: + if pre_reveal_lateness > 0: + assert post_reveal_lateness < pre_reveal_lateness + + yield 'post', state + + +@with_all_phases_except(['phase0']) +@always_bls +@spec_state_test +def test_success(spec, state): + state.slot += spec.EPOCHS_PER_CUSTODY_PERIOD * spec.SLOTS_PER_EPOCH + custody_key_reveal = get_valid_custody_key_reveal(spec, state) + + yield from run_custody_key_reveal_processing(spec, state, custody_key_reveal) + + +@with_all_phases_except(['phase0']) +@always_bls +@spec_state_test +def test_reveal_too_early(spec, state): + custody_key_reveal = get_valid_custody_key_reveal(spec, state) + + yield from run_custody_key_reveal_processing(spec, state, custody_key_reveal, False) + + +@with_all_phases_except(['phase0']) +@always_bls +@spec_state_test +def test_wrong_period(spec, state): + custody_key_reveal = get_valid_custody_key_reveal(spec, state, period=5) + + yield from run_custody_key_reveal_processing(spec, state, custody_key_reveal, False) + + +@with_all_phases_except(['phase0']) +@always_bls +@spec_state_test +def test_late_reveal(spec, state): + state.slot += spec.EPOCHS_PER_CUSTODY_PERIOD * spec.SLOTS_PER_EPOCH * 3 + 150 + custody_key_reveal = get_valid_custody_key_reveal(spec, state) + + yield from run_custody_key_reveal_processing(spec, state, custody_key_reveal) + + +@with_all_phases_except(['phase0']) +@always_bls +@spec_state_test +def test_double_reveal(spec, state): + state.slot += spec.EPOCHS_PER_CUSTODY_PERIOD * spec.SLOTS_PER_EPOCH * 2 + custody_key_reveal = get_valid_custody_key_reveal(spec, state) + + _, _, _ = run_custody_key_reveal_processing(spec, state, custody_key_reveal) + + yield from run_custody_key_reveal_processing(spec, state, custody_key_reveal, False) + + +@with_all_phases_except(['phase0']) +@always_bls +@spec_state_test +def test_max_decrement(spec, state): + state.slot += spec.EPOCHS_PER_CUSTODY_PERIOD * spec.SLOTS_PER_EPOCH * 3 + 150 + custody_key_reveal = get_valid_custody_key_reveal(spec, state) + + _, _, _ = run_custody_key_reveal_processing(spec, state, custody_key_reveal) + + custody_key_reveal2 = get_valid_custody_key_reveal(spec, state) + + yield from run_custody_key_reveal_processing(spec, state, custody_key_reveal2) diff --git a/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_early_derived_secret_reveal.py b/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_early_derived_secret_reveal.py index 831ad35a5..63f4721b9 100644 --- a/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_early_derived_secret_reveal.py +++ b/test_libs/pyspec/eth2spec/test/phase_1/block_processing/test_process_early_derived_secret_reveal.py @@ -98,25 +98,21 @@ def test_reveal_with_custody_padding_minus_one(spec, state): @never_bls @spec_state_test def test_double_reveal(spec, state): + epoch = spec.get_current_epoch(state) + spec.RANDAO_PENALTY_EPOCHS randao_key_reveal1 = get_valid_early_derived_secret_reveal( spec, state, - spec.get_current_epoch(state) + spec.RANDAO_PENALTY_EPOCHS + 1, + epoch, ) - res = dict(run_early_derived_secret_reveal_processing(spec, state, randao_key_reveal1)) - pre_state = res['pre'] - yield 'pre', pre_state - intermediate_state = res['post'] + _, _, _ = dict(run_early_derived_secret_reveal_processing(spec, state, randao_key_reveal1)) randao_key_reveal2 = get_valid_early_derived_secret_reveal( spec, - intermediate_state, - spec.get_current_epoch(pre_state) + spec.RANDAO_PENALTY_EPOCHS + 1, + state, + epoch, ) - res = dict(run_early_derived_secret_reveal_processing(spec, intermediate_state, randao_key_reveal2, False)) - post_state = res['post'] - yield 'randao_key_reveal', [randao_key_reveal1, randao_key_reveal2] - yield 'post', post_state + + yield from run_early_derived_secret_reveal_processing(spec, state, randao_key_reveal2, False) @with_all_phases_except(['phase0']) diff --git a/test_libs/pyspec/eth2spec/utils/bls.py b/test_libs/pyspec/eth2spec/utils/bls.py index d8a9ab5be..f40e5ab04 100644 --- a/test_libs/pyspec/eth2spec/utils/bls.py +++ b/test_libs/pyspec/eth2spec/utils/bls.py @@ -5,6 +5,7 @@ bls_active = True STUB_SIGNATURE = b'\x11' * 96 STUB_PUBKEY = b'\x22' * 48 +STUB_COORDINATES = bls.api.signature_to_G2(bls.sign(b"", 0, b"\0" * 8)) def only_with_bls(alt_return=None): @@ -47,3 +48,8 @@ def bls_aggregate_signatures(signatures): def bls_sign(message_hash, privkey, domain): return bls.sign(message_hash=message_hash, privkey=privkey, domain=domain) + + +@only_with_bls(alt_return=STUB_COORDINATES) +def bls_signature_to_G2(signature): + return bls.api.signature_to_G2(signature) diff --git a/test_libs/pyspec/eth2spec/utils/merkle_minimal.py b/test_libs/pyspec/eth2spec/utils/merkle_minimal.py index 9d7138d7d..aae7ff5c0 100644 --- a/test_libs/pyspec/eth2spec/utils/merkle_minimal.py +++ b/test_libs/pyspec/eth2spec/utils/merkle_minimal.py @@ -20,6 +20,13 @@ def calc_merkle_tree_from_leaves(values, layer_count=32): return tree +def get_merkle_tree(values, pad_to=None): + layer_count = (len(values) - 1).bit_length() if pad_to is None else (pad_to - 1).bit_length() + if len(values) == 0: + return zerohashes[layer_count] + return calc_merkle_tree_from_leaves(values, layer_count) + + def get_merkle_root(values, pad_to=1): if pad_to == 0: return zerohashes[0] @@ -29,9 +36,9 @@ def get_merkle_root(values, pad_to=1): return calc_merkle_tree_from_leaves(values, layer_count)[-1][0] -def get_merkle_proof(tree, item_index): +def get_merkle_proof(tree, item_index, tree_len=None): proof = [] - for i in range(32): + for i in range(tree_len if tree_len is not None else len(tree)): subindex = (item_index // 2**i) ^ 1 proof.append(tree[i][subindex] if subindex < len(tree[i]) else zerohashes[i]) return proof From e23b37842ea21f0ab0e6caa5141a59846ecbd62d Mon Sep 17 00:00:00 2001 From: Age Manning Date: Tue, 13 Aug 2019 08:09:44 +1000 Subject: [PATCH 107/130] Update specs/networking/p2p-interface.md Co-Authored-By: Danny Ryan --- specs/networking/p2p-interface.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index 84ad45022..0b2e3562a 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -301,7 +301,7 @@ Here `result` represents the 1-byte response code. The token of the negotiated protocol ID specifies the type of encoding to be used for the req/resp interaction. Two values are possible at this time: -- `ssz`: the contents are [SSZ](https://github.com/ethereum/eth2.0-specs/blob/192442be51a8a6907d6401dffbf5c73cb220b760/specs/networking/libp2p-standardization.md#ssz-encoding) encoded. This encoding type MUST be supported by all clients. +- `ssz`: the contents are [SSZ](#ssz-encoding) encoded. This encoding type MUST be supported by all clients. For objects containing a single field, only the field is SSZ-encoded not a container with a single field. For example, the `BeaconBlocks` response would be an SSZ-encoded list of `BeaconBlock`s. All SSZ-Lists in the Req/Resp domain will have a max-list size of `SSZ_MAX_LIST_SIZE`. - `ssz_snappy`: the contents are SSZ encoded, and subsequently compressed with [Snappy](https://github.com/google/snappy). MAY be supported in the interoperability testnet; and MUST be supported in mainnet. From c224af999e4895983d0257403fb9d85dd679b6b7 Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Mon, 12 Aug 2019 19:05:16 -0600 Subject: [PATCH 108/130] MAX_PERSISTENT_COMMITTEE_SIZE -> TARGET_PERSISTENT_COMMITTEE_SIZE --- specs/core/1_shard-data-chains.md | 18 +++++++++--------- .../test/helpers/phase1/shard_block.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/specs/core/1_shard-data-chains.md b/specs/core/1_shard-data-chains.md index 079c0b4b7..8e1532f17 100644 --- a/specs/core/1_shard-data-chains.md +++ b/specs/core/1_shard-data-chains.md @@ -66,7 +66,7 @@ We define the following Python custom types for type hinting and readability: | Name | Value | | - | - | | `SHARD_SLOTS_PER_BEACON_SLOT` | `2**1` (= 2) | -| `MAX_PERSISTENT_COMMITTEE_SIZE` | `2**7` (= 128) | +| `TARGET_PERSISTENT_COMMITTEE_SIZE` | `2**7` (= 128) | | `SHARD_HEADER_SIZE` | `2**9` (= 512) | | `SHARD_BLOCK_SIZE_TARGET` | `2**14` (= 16,384) | | `SHARD_BLOCK_SIZE_LIMIT` | `2**16` (= 65,536) | @@ -151,7 +151,7 @@ class ShardBlockCore(Container): data_root: Hash state_root: Hash total_bytes: uint64 - attester_bitfield: Bitvector[MAX_PERSISTENT_COMMITTEE_SIZE * 2] + attester_bitfield: Bitvector[TARGET_PERSISTENT_COMMITTEE_SIZE * 2] ``` ### `ExtendedShardBlockCore` @@ -164,7 +164,7 @@ class ExtendedShardBlockCore(Container): data: Bytes[SHARD_BLOCK_SIZE_LIMIT - SHARD_HEADER_SIZE] state_root: Hash total_bytes: uint64 - attester_bitfield: Bitvector[MAX_PERSISTENT_COMMITTEE_SIZE * 2] + attester_bitfield: Bitvector[TARGET_PERSISTENT_COMMITTEE_SIZE * 2] ``` ### `ShardState` @@ -172,10 +172,10 @@ class ExtendedShardBlockCore(Container): ```python class ShardState(Container): history_accumulator: Vector[Hash, HISTORY_ACCUMULATOR_VECTOR] - earlier_committee_rewards: List[uint64, MAX_PERSISTENT_COMMITTEE_SIZE] - later_committee_rewards: List[uint64, MAX_PERSISTENT_COMMITTEE_SIZE] - earlier_committee_fees: List[Gwei, MAX_PERSISTENT_COMMITTEE_SIZE] - later_committee_fees: List[Gwei, MAX_PERSISTENT_COMMITTEE_SIZE] + earlier_committee_rewards: List[uint64, TARGET_PERSISTENT_COMMITTEE_SIZE] + later_committee_rewards: List[uint64, TARGET_PERSISTENT_COMMITTEE_SIZE] + earlier_committee_fees: List[Gwei, TARGET_PERSISTENT_COMMITTEE_SIZE] + later_committee_fees: List[Gwei, TARGET_PERSISTENT_COMMITTEE_SIZE] basefee: Gwei slot: ShardSlot shard: Shard @@ -230,7 +230,7 @@ def get_period_committee(state: BeaconState, epoch: Epoch, shard: Shard) -> Sequ count=SHARD_COUNT, ) - return full_committee[:MAX_PERSISTENT_COMMITTEE_SIZE] + return full_committee[:TARGET_PERSISTENT_COMMITTEE_SIZE] ``` ### `get_persistent_committee` @@ -495,7 +495,7 @@ def shard_block_transition(state: ShardState, add_reward(state, beacon_state, validator_index, base_reward) attestations += 1 - for i in range(len(attester_committee), MAX_PERSISTENT_COMMITTEE_SIZE): + for i in range(len(attester_committee), TARGET_PERSISTENT_COMMITTEE_SIZE): assert block.core.attester_bitfield[i] is False or block.core.attester_bitfield[i] == 0 # TODO: FIX Bitvector assert bls_verify( diff --git a/test_libs/pyspec/eth2spec/test/helpers/phase1/shard_block.py b/test_libs/pyspec/eth2spec/test/helpers/phase1/shard_block.py index 42e2765ea..b9c388a3f 100644 --- a/test_libs/pyspec/eth2spec/test/helpers/phase1/shard_block.py +++ b/test_libs/pyspec/eth2spec/test/helpers/phase1/shard_block.py @@ -57,7 +57,7 @@ def build_empty_shard_block(spec, attester_committee = spec.get_persistent_committee(beacon_state, shard_state.shard, block.core.slot) block.core.attester_bitfield = list( (True,) * len(attester_committee) + - (False,) * (spec.MAX_PERSISTENT_COMMITTEE_SIZE * 2 - len(attester_committee)) + (False,) * (spec.TARGET_PERSISTENT_COMMITTEE_SIZE * 2 - len(attester_committee)) ) block.signatures.attestation_signature = sign_shard_attestation( spec, From 84558e0c4c72dca4f347e49b22741363157038be Mon Sep 17 00:00:00 2001 From: Justin Date: Wed, 14 Aug 2019 15:48:30 +0200 Subject: [PATCH 109/130] Add summaries and expansions to simple-serialize.md --- specs/simple-serialize.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 119022248..ecef6ddfc 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -217,6 +217,12 @@ We now define Merkleization `hash_tree_root(value)` of an object `value` recursi Let `value` be a self-signed container object. The convention is that the signature (e.g. a `"bytes96"` BLS12-381 signature) be the last field of `value`. Further, the signed message for `value` is `signing_root(value) = hash_tree_root(truncate_last(value))` where `truncate_last` truncates the last element of `value`. +## Summaries and expansions + +Let `A` be an object derived from another object `B` by replacing some of the (possibly nested) values of `B` by their `hash_tree_root`. We say `A` is a "summary" of `B`, and that `B` is an "expansion" of `A`. Notice `hash_tree_root(A) == hash_tree_root(B)`. + +We similarly define "summary types" and "expansion types". For example, [`BeaconBlock`](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/core/0_beacon-chain.md#beaconblock) is an expansion type of [`BeaconBlockHeader`](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/core/0_beacon-chain.md#beaconblockheader). Notice that objects expand to at most one object of a given expansion type. For example, `BeaconBlockHeader` objects uniquely expand to `BeaconBlock` objects. + ## Implementations | Language | Project | Maintainer | Implementation | From 5d8c31cfb17feff2edffbba1830437569611bc42 Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Wed, 14 Aug 2019 11:07:03 -0600 Subject: [PATCH 110/130] Update specs/simple-serialize.md --- specs/simple-serialize.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index ecef6ddfc..50d091c07 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -221,7 +221,7 @@ Let `value` be a self-signed container object. The convention is that the signat Let `A` be an object derived from another object `B` by replacing some of the (possibly nested) values of `B` by their `hash_tree_root`. We say `A` is a "summary" of `B`, and that `B` is an "expansion" of `A`. Notice `hash_tree_root(A) == hash_tree_root(B)`. -We similarly define "summary types" and "expansion types". For example, [`BeaconBlock`](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/core/0_beacon-chain.md#beaconblock) is an expansion type of [`BeaconBlockHeader`](https://github.com/ethereum/eth2.0-specs/blob/dev/specs/core/0_beacon-chain.md#beaconblockheader). Notice that objects expand to at most one object of a given expansion type. For example, `BeaconBlockHeader` objects uniquely expand to `BeaconBlock` objects. +We similarly define "summary types" and "expansion types". For example, [`BeaconBlock`](./core/0_beacon-chain.md#beaconblock) is an expansion type of [`BeaconBlockHeader`](./core/0_beacon-chain.md#beaconblockheader). Notice that objects expand to at most one object of a given expansion type. For example, `BeaconBlockHeader` objects uniquely expand to `BeaconBlock` objects. ## Implementations From c7af2496ef0f6cf113b52508e6b352378ea14746 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Wed, 14 Aug 2019 23:44:19 +0200 Subject: [PATCH 111/130] Update specs/light_client/merkle_proofs.md Co-Authored-By: Diederik Loerakker --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index e0be4f070..698dce4e6 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -229,7 +229,7 @@ def get_helper_indices(indices: List[GeneralizedIndex]) -> List[GeneralizedIndex x for x in all_indices if not (generalized_index_child(x, 0) in all_indices and generalized_index_child(x, 1) in all_indices) and not (x in indices) - ])[::-1] + ], reverse=True) ``` Now we provide the Merkle proof verification functions. First, for single item proofs: From 72103e9deb0aeec8ead093913265b920c3452cbe Mon Sep 17 00:00:00 2001 From: vbuterin Date: Wed, 14 Aug 2019 23:44:26 +0200 Subject: [PATCH 112/130] Update specs/light_client/merkle_proofs.md Co-Authored-By: Diederik Loerakker --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 698dce4e6..21115dd27 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -256,7 +256,7 @@ def verify_merkle_multiproof(leaves: Sequence[Hash], proof: Sequence[Hash], indi **{index:node for index, node in zip(indices, leaves)}, **{index:node for index, node in zip(helper_indices, proof)} } - keys = sorted(objects.keys())[::-1] + keys = sorted(objects.keys(), reverse=True) pos = 0 while pos < len(keys): k = keys[pos] From 722a69467fadc3fea659e2943bbda9cf976c5d31 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 15 Aug 2019 15:07:44 +0800 Subject: [PATCH 113/130] Add `light_client/merkle_proofs.md` to executable stack. Errors revealed. --- Makefile | 2 +- scripts/build_spec.py | 50 +++++++++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index eeaed8898..fb93908cc 100644 --- a/Makefile +++ b/Makefile @@ -89,7 +89,7 @@ $(PY_SPEC_PHASE_0_TARGETS): $(PY_SPEC_PHASE_0_DEPS) python3 $(SCRIPT_DIR)/build_spec.py -p0 $(SPEC_DIR)/core/0_beacon-chain.md $(SPEC_DIR)/core/0_fork-choice.md $(SPEC_DIR)/validator/0_beacon-chain-validator.md $@ $(PY_SPEC_DIR)/eth2spec/phase1/spec.py: $(PY_SPEC_PHASE_1_DEPS) - python3 $(SCRIPT_DIR)/build_spec.py -p1 $(SPEC_DIR)/core/0_beacon-chain.md $(SPEC_DIR)/core/1_custody-game.md $(SPEC_DIR)/core/1_shard-data-chains.md $(SPEC_DIR)/core/0_fork-choice.md $@ + python3 $(SCRIPT_DIR)/build_spec.py -p1 $(SPEC_DIR)/core/0_beacon-chain.md $(SPEC_DIR)/core/0_fork-choice.md $(SPEC_DIR)/core/1_custody-game.md $(SPEC_DIR)/core/1_shard-data-chains.md $(SPEC_DIR)/light_client/merkle_proofs.md $@ CURRENT_DIR = ${CURDIR} diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 88c3d46fb..9c5263399 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -281,17 +281,23 @@ def build_phase0_spec(phase0_sourcefile: str, fork_choice_sourcefile: str, def build_phase1_spec(phase0_sourcefile: str, + fork_choice_sourcefile: str, phase1_custody_sourcefile: str, phase1_shard_sourcefile: str, - fork_choice_sourcefile: str, + merkle_proofs_sourcefile: str, outfile: str=None) -> Optional[str]: - phase0_spec = get_spec(phase0_sourcefile) - remove_for_phase1(phase0_spec[0]) - phase1_custody = get_spec(phase1_custody_sourcefile) - phase1_shard_data = get_spec(phase1_shard_sourcefile) - fork_choice_spec = get_spec(fork_choice_sourcefile) - spec_objects = phase0_spec - for value in [phase1_custody, phase1_shard_data, fork_choice_spec]: + all_sourcefiles = ( + phase0_sourcefile, + fork_choice_sourcefile, + phase1_custody_sourcefile, + phase1_shard_sourcefile, + merkle_proofs_sourcefile, + ) + all_spescs = [get_spec(spec) for spec in all_sourcefiles] + for spec in all_spescs: + remove_for_phase1(spec[0]) + spec_objects = all_spescs[0] + for value in all_spescs[1:]: spec_objects = combine_spec_objects(spec_objects, value) spec = objects_to_spec(*spec_objects, PHASE1_IMPORTS) if outfile is not None: @@ -304,17 +310,18 @@ if __name__ == '__main__': description = ''' Build the specs from the md docs. If building phase 0: - 1st argument is input spec.md - 2nd argument is input fork_choice.md - 3rd argument is input validator_guide.md + 1st argument is input /core/0_beacon-chain.md + 2nd argument is input /core/0_fork-choice.md + 3rd argument is input /core/0_beacon-chain-validator.md 4th argument is output spec.py If building phase 1: - 1st argument is input spec_phase0.md - 2nd argument is input spec_phase1_custody.md - 3rd argument is input spec_phase1_shard_data.md - 4th argument is input fork_choice.md - 5th argument is output spec.py + 1st argument is input /core/0_beacon-chain.md + 2nd argument is input /core/0_fork-choice.md + 3rd argument is input /core/1_custody-game.md + 4th argument is input /core/1_shard-data-chains.md + 5th argument is input /light_client/merkle_proofs.md + 6th argument is output spec.py ''' parser = ArgumentParser(description=description) parser.add_argument("-p", "--phase", dest="phase", type=int, default=0, help="Build for phase #") @@ -327,10 +334,15 @@ If building phase 1: else: print(" Phase 0 requires spec, forkchoice, and v-guide inputs as well as an output file.") elif args.phase == 1: - if len(args.files) == 5: + if len(args.files) == 6: build_phase1_spec(*args.files) else: - print(" Phase 1 requires 4 input files as well as an output file: " - + "(phase0.md and phase1.md, phase1.md, fork_choice.md, output.py)") + print( + " Phase 1 requires input files as well as an output file:\n" + "\t core/phase_0: (0_beacon-chain.md, 0_fork-choice.md)\n" + "\t core/phase_1: (1_custody-game.md, 1_shard-data-chains.md)\n" + "\t light_client: (merkle_proofs.md)\n" + "\t and output.py" + ) else: print("Invalid phase: {0}".format(args.phase)) From dc933914213895e1eda337d8408a552e0aeb0548 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 15 Aug 2019 15:30:01 +0800 Subject: [PATCH 114/130] Make flake8 check pass --- scripts/build_spec.py | 9 +- specs/light_client/merkle_proofs.md | 130 +++++++++++++++++++--------- 2 files changed, 96 insertions(+), 43 deletions(-) diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 9c5263399..410db2f21 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -39,6 +39,9 @@ from eth2spec.utils.hash_function import hash PHASE1_IMPORTS = '''from typing import ( Any, Dict, Optional, Set, Sequence, MutableSequence, Tuple, Union, ) +from math import ( + log2, +) from dataclasses import ( dataclass, @@ -51,8 +54,10 @@ from eth2spec.utils.ssz.ssz_impl import ( is_zero, ) from eth2spec.utils.ssz.ssz_typing import ( - uint64, bit, boolean, Container, List, Vector, Bytes, BytesN, - Bytes1, Bytes4, Bytes8, Bytes32, Bytes48, Bytes96, Bitlist, Bitvector, + BasicValue, Elements, BaseList, SSZType, + Container, List, Vector, Bytes, BytesN, Bitlist, Bitvector, Bits, + Bytes1, Bytes4, Bytes8, Bytes32, Bytes48, Bytes96, + uint64, bit, boolean, ) from eth2spec.utils.bls import ( bls_aggregate_pubkeys, diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 21115dd27..009f5a66f 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -6,17 +6,54 @@ - [Merkle proof formats](#merkle-proof-formats) - - [Table of contents](#table-of-contents) - - [Constants](#constants) - - [Generalized Merkle tree index](#generalized-merkle-tree-index) - - [SSZ object to index](#ssz-object-to-index) - - [Merkle multiproofs](#merkle-multiproofs) - - [MerklePartial](#merklepartial) - - [`SSZMerklePartial`](#sszmerklepartial) - - [Proofs for execution](#proofs-for-execution) + - [Table of contents](#table-of-contents) + - [Custom types](#custom-types) + - [Helpers](#helpers) + - [Generalized Merkle tree index](#generalized-merkle-tree-index) + - [SSZ object to index](#ssz-object-to-index) + - [Helpers for generalized indices](#helpers-for-generalized-indices) + - [`concat_generalized_indices`](#concat_generalized_indices) + - [`get_generalized_index_length`](#get_generalized_index_length) + - [`get_generalized_index_bit`](#get_generalized_index_bit) + - [`generalized_index_sibling`](#generalized_index_sibling) + - [`generalized_index_child`](#generalized_index_child) + - [`generalized_index_parent`](#generalized_index_parent) + - [Merkle multiproofs](#merkle-multiproofs) +## Custom types + +We define the following Python custom types for type hinting and readability: + +| Name | SSZ equivalent | Description | +| - | - | - | +| `GeneralizedIndex` | `uint64` | the index of a node in a binary Merkle tree | + +## Helpers + +```python +def get_next_power_of_two(x: int) -> int: + """ + Get next power of 2 >= the input. + """ + if x <= 2: + return x + elif x % 2 == 0: + return 2 * get_next_power_of_two(x // 2) + else: + return 2 * get_next_power_of_two((x + 1) // 2) +``` + +```python +def get_previous_power_of_two(x: int) -> int: + """ + Get the previous power of 2 >= the input. + """ + assert x >= 2 + return get_next_power_of_two(x) // 2 +``` + ## Generalized Merkle tree index In a binary Merkle tree, we define a "generalized index" of a node as `2**depth + index`. Visually, this looks as follows: @@ -32,8 +69,8 @@ Note that the generalized index has the convenient property that the two childre ```python def merkle_tree(leaves: List[Bytes32]) -> List[Bytes32]: - padded_length = next_power_of_2(len(leaves)) - o = [ZERO_HASH] * padded_length + leaves + [ZERO_HASH] * (padded_length - len(leaves)) + padded_length = get_next_power_of_two(len(leaves)) + o = [Hash()] * padded_length + leaves + [Hash()] * (padded_length - len(leaves)) for i in range(len(leaves) - 1, 0, -1): o[i] = hash(o[i * 2] + o[i * 2 + 1]) return o @@ -61,25 +98,27 @@ We can now define a concept of a "path", a way of describing a function that tak ```python def item_length(typ: SSZType) -> int: """ - Returns the number of bytes in a basic type, or 32 (a full hash) for compound types. + Return the number of bytes in a basic type, or 32 (a full hash) for compound types. """ if issubclass(typ, BasicValue): return typ.byte_len else: return 32 - - -def get_elem_type(typ: ComplexType, index: Union[int, str]) -> Type: +``` + +```python +def get_elem_type(typ: Union[BaseList, Container], index: Union[int, str]) -> SSZType: """ - Returns the type of the element of an object of the given type with the given index + Return the type of the element of an object of the given type with the given index or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) """ - return typ.get_fields()[index] if issubclass(typ, Container) else typ.elem_type - + return typ.get_fields()[index] if issubclass(typ, Container) else typ.elem_type +``` +```python def chunk_count(typ: SSZType) -> int: """ - Returns the number of hashes needed to represent the top-level elements in the given type + Return the number of hashes needed to represent the top-level elements in the given type (eg. `x.foo` or `x[7]` but not `x[7].bar` or `x.foo.baz`). In all cases except lists/vectors of basic types, this is simply the number of top-level elements, as each element gets one hash. For lists/vectors of basic types, it is often fewer because multiple basic elements @@ -96,13 +135,16 @@ def chunk_count(typ: SSZType) -> int: return len(typ.get_fields()) else: raise Exception(f"Type not supported: {typ}") +``` - +```python def get_item_position(typ: SSZType, index: Union[int, str]) -> Tuple[int, int, int]: """ - Returns three variables: (i) the index of the chunk in which the given element of the item is - represented, (ii) the starting byte position within the chunk, (iii) the ending byte position within the chunk. For example for - a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16) + Return three variables: + (i) the index of the chunk in which the given element of the item is represented; + (ii) the starting byte position within the chunk; + (iii) the ending byte position within the chunk. + For example: for a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16) """ if issubclass(typ, Elements): start = index * item_length(typ.elem_type) @@ -111,9 +153,10 @@ def get_item_position(typ: SSZType, index: Union[int, str]) -> Tuple[int, int, i return typ.get_field_names().index(index), 0, item_length(get_elem_type(typ, index)) else: raise Exception("Only lists/vectors/containers supported") +``` - -def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> GeneralizedIndex: +```python +def get_generalized_index(typ: SSZType, path: List[Union[int, str]]) -> GeneralizedIndex: """ Converts a path (eg. `[7, "foo", 3]` for `x[7].foo[3]`, `[12, "bar", "__len__"]` for `len(x[12].bar)`) into the generalized index representing its position in the Merkle tree. @@ -125,7 +168,7 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> Generalized typ, root = uint64, root * 2 + 1 if issubclass(typ, (List, Bytes)) else None else: pos, _, _ = get_item_position(typ, p) - root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(chunk_count(typ)) + pos + root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * get_next_power_of_two(chunk_count(typ)) + pos typ = get_elem_type(typ, p) return root ``` @@ -144,7 +187,7 @@ def concat_generalized_indices(*indices: Sequence[GeneralizedIndex]) -> Generali """ o = GeneralizedIndex(1) for i in indices: - o = o * get_previous_power_of_2(i) + (i - get_previous_power_of_2(i)) + o = o * get_previous_power_of_two(i) + (i - get_previous_power_of_two(i)) return o ``` @@ -152,41 +195,41 @@ def concat_generalized_indices(*indices: Sequence[GeneralizedIndex]) -> Generali ```python def get_generalized_index_length(index: GeneralizedIndex) -> int: - """ - Returns the length of a path represented by a generalized index. - """ - return log2(index) + """ + Return the length of a path represented by a generalized index. + """ + return log2(index) ``` #### `get_generalized_index_bit` ```python def get_generalized_index_bit(index: GeneralizedIndex, position: int) -> bool: - """ - Returns the given bit of a generalized index. - """ - return (index & (1 << position)) > 0 + """ + Return the given bit of a generalized index. + """ + return (index & (1 << position)) > 0 ``` #### `generalized_index_sibling` ```python def generalized_index_sibling(index: GeneralizedIndex) -> GeneralizedIndex: - return index ^ 1 + return index ^ 1 ``` #### `generalized_index_child` ```python def generalized_index_child(index: GeneralizedIndex, right_side: bool) -> GeneralizedIndex: - return index * 2 + right_side + return index * 2 + right_side ``` #### `generalized_index_parent` ```python def generalized_index_parent(index: GeneralizedIndex) -> GeneralizedIndex: - return index // 2 + return index // 2 ``` ## Merkle multiproofs @@ -214,7 +257,9 @@ def get_branch_indices(tree_index: GeneralizedIndex) -> List[GeneralizedIndex]: while o[-1] > 1: o.append(generalized_index_sibling(generalized_index_parent(o[-1]))) return o[:-1] +``` +```python def get_helper_indices(indices: List[GeneralizedIndex]) -> List[GeneralizedIndex]: """ Get the generalized indices of all "extra" chunks in the tree needed to prove the chunks with the given @@ -224,7 +269,7 @@ def get_helper_indices(indices: List[GeneralizedIndex]) -> List[GeneralizedIndex all_indices = set() for index in indices: all_indices = all_indices.union(set(get_branch_indices(index) + [index])) - + return sorted([ x for x in all_indices if not (generalized_index_child(x, 0) in all_indices and generalized_index_child(x, 1) in all_indices) and not @@ -248,13 +293,16 @@ def verify_merkle_proof(leaf: Hash, proof: Sequence[Hash], index: GeneralizedInd Now for multi-item proofs: ```python -def verify_merkle_multiproof(leaves: Sequence[Hash], proof: Sequence[Hash], indices: Sequence[GeneralizedIndex], root: Hash) -> bool: +def verify_merkle_multiproof(leaves: Sequence[Hash], + proof: Sequence[Hash], + indices: Sequence[GeneralizedIndex], + root: Hash) -> bool: assert len(leaves) == len(indices) helper_indices = get_helper_indices(indices) assert len(proof) == len(helper_indices) objects = { - **{index:node for index, node in zip(indices, leaves)}, - **{index:node for index, node in zip(helper_indices, proof)} + **{index: node for index, node in zip(indices, leaves)}, + **{index: node for index, node in zip(helper_indices, proof)} } keys = sorted(objects.keys(), reverse=True) pos = 0 From d88a83d48265656be5542a51cc6ddc5d444ceffc Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 15 Aug 2019 16:01:53 +0800 Subject: [PATCH 115/130] Fix most mypy errors --- specs/light_client/merkle_proofs.md | 39 +++++++++++++++++------------ 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 009f5a66f..9d530f7c2 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -156,19 +156,20 @@ def get_item_position(typ: SSZType, index: Union[int, str]) -> Tuple[int, int, i ``` ```python -def get_generalized_index(typ: SSZType, path: List[Union[int, str]]) -> GeneralizedIndex: +def get_generalized_index(typ: SSZType, path: List[Union[int, str]]) -> Optional[GeneralizedIndex]: """ Converts a path (eg. `[7, "foo", 3]` for `x[7].foo[3]`, `[12, "bar", "__len__"]` for `len(x[12].bar)`) into the generalized index representing its position in the Merkle tree. """ - root = 1 + root: Optional[GeneralizedIndex] = GeneralizedIndex(1) for p in path: assert not issubclass(typ, BasicValue) # If we descend to a basic type, the path cannot continue further if p == '__len__': typ, root = uint64, root * 2 + 1 if issubclass(typ, (List, Bytes)) else None else: pos, _, _ = get_item_position(typ, p) - root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * get_next_power_of_two(chunk_count(typ)) + pos + base_index = (GeneralizedIndex(2) if issubclass(typ, (List, Bytes)) else GeneralizedIndex(1)) + root = root * base_index * get_next_power_of_two(chunk_count(typ)) + pos typ = get_elem_type(typ, p) return root ``` @@ -180,14 +181,14 @@ _Usage note: functions outside this section should manipulate generalized indice #### `concat_generalized_indices` ```python -def concat_generalized_indices(*indices: Sequence[GeneralizedIndex]) -> GeneralizedIndex: +def concat_generalized_indices(indices: Sequence[GeneralizedIndex]) -> GeneralizedIndex: """ Given generalized indices i1 for A -> B, i2 for B -> C .... i_n for Y -> Z, returns the generalized index for A -> Z. """ o = GeneralizedIndex(1) for i in indices: - o = o * get_previous_power_of_two(i) + (i - get_previous_power_of_two(i)) + o = GeneralizedIndex(o * get_previous_power_of_two(i) + (i - get_previous_power_of_two(i))) return o ``` @@ -198,7 +199,7 @@ def get_generalized_index_length(index: GeneralizedIndex) -> int: """ Return the length of a path represented by a generalized index. """ - return log2(index) + return int(log2(index)) ``` #### `get_generalized_index_bit` @@ -215,21 +216,21 @@ def get_generalized_index_bit(index: GeneralizedIndex, position: int) -> bool: ```python def generalized_index_sibling(index: GeneralizedIndex) -> GeneralizedIndex: - return index ^ 1 + return GeneralizedIndex(index ^ 1) ``` #### `generalized_index_child` ```python def generalized_index_child(index: GeneralizedIndex, right_side: bool) -> GeneralizedIndex: - return index * 2 + right_side + return GeneralizedIndex(index * 2 + right_side) ``` #### `generalized_index_parent` ```python def generalized_index_parent(index: GeneralizedIndex) -> GeneralizedIndex: - return index // 2 + return GeneralizedIndex(index // 2) ``` ## Merkle multiproofs @@ -266,14 +267,17 @@ def get_helper_indices(indices: List[GeneralizedIndex]) -> List[GeneralizedIndex generalized indices. Note that the decreasing order is chosen deliberately to ensure equivalence to the order of hashes in a regular single-item Merkle proof in the single-item case. """ - all_indices = set() + all_indices: Set[GeneralizedIndex] = set() for index in indices: all_indices = all_indices.union(set(get_branch_indices(index) + [index])) return sorted([ - x for x in all_indices if not - (generalized_index_child(x, 0) in all_indices and generalized_index_child(x, 1) in all_indices) and not - (x in indices) + x for x in all_indices if ( + not ( + generalized_index_child(x, GeneralizedIndex(0)) in all_indices and + generalized_index_child(x, GeneralizedIndex(1)) in all_indices + ) and not (x in indices) + ) ], reverse=True) ``` @@ -309,10 +313,13 @@ def verify_merkle_multiproof(leaves: Sequence[Hash], while pos < len(keys): k = keys[pos] if k in objects and k ^ 1 in objects and k // 2 not in objects: - objects[k // 2] = hash(objects[(k | 1) ^ 1] + objects[k | 1]) - keys.append(k // 2) + objects[GeneralizedIndex(k // 2)] = hash( + objects[GeneralizedIndex((k | 1) ^ 1)] + + objects[GeneralizedIndex(k | 1)] + ) + keys.append(GeneralizedIndex(k // 2)) pos += 1 - return objects[1] == root + return objects[GeneralizedIndex(1)] == root ``` Note that the single-item proof is a special case of a multi-item proof; a valid single-item proof verifies correctly when put into the multi-item verification function (making the natural trivial changes to input arguments, `index -> [index]` and `leaf -> [leaf]`). From 0f52d460a5649805296e497bb7820eb55b22caef Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 15 Aug 2019 16:14:07 +0800 Subject: [PATCH 116/130] Use the `get_previous_power_of_2` function in ethereum/eth2.0-specs#1323 --- specs/light_client/merkle_proofs.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 9d530f7c2..faad40c45 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -50,8 +50,7 @@ def get_previous_power_of_two(x: int) -> int: """ Get the previous power of 2 >= the input. """ - assert x >= 2 - return get_next_power_of_two(x) // 2 + return x if x <= 2 else 2 * get_previous_power_of_2(x // 2) ``` ## Generalized Merkle tree index From 2741a5f33dfa0a19fb2185705e5bd1cc4435baa0 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 15 Aug 2019 18:26:22 +0800 Subject: [PATCH 117/130] Minor fixes --- scripts/build_spec.py | 2 +- specs/light_client/merkle_proofs.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 410db2f21..07306af8a 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -343,7 +343,7 @@ If building phase 1: build_phase1_spec(*args.files) else: print( - " Phase 1 requires input files as well as an output file:\n" + " Phase 1 requires input files as well as an output file:\n" "\t core/phase_0: (0_beacon-chain.md, 0_fork-choice.md)\n" "\t core/phase_1: (1_custody-game.md, 1_shard-data-chains.md)\n" "\t light_client: (merkle_proofs.md)\n" diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index faad40c45..d6c6dee62 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -8,7 +8,7 @@ - [Merkle proof formats](#merkle-proof-formats) - [Table of contents](#table-of-contents) - [Custom types](#custom-types) - - [Helpers](#helpers) + - [Helper functions](#helper-functions) - [Generalized Merkle tree index](#generalized-merkle-tree-index) - [SSZ object to index](#ssz-object-to-index) - [Helpers for generalized indices](#helpers-for-generalized-indices) @@ -30,7 +30,7 @@ We define the following Python custom types for type hinting and readability: | - | - | - | | `GeneralizedIndex` | `uint64` | the index of a node in a binary Merkle tree | -## Helpers +## Helper functions ```python def get_next_power_of_two(x: int) -> int: @@ -67,7 +67,7 @@ In a binary Merkle tree, we define a "generalized index" of a node as `2**depth Note that the generalized index has the convenient property that the two children of node `k` are `2k` and `2k+1`, and also that it equals the position of a node in the linear representation of the Merkle tree that's computed by this function: ```python -def merkle_tree(leaves: List[Bytes32]) -> List[Bytes32]: +def merkle_tree(leaves: Squence[Hash]) -> Squence[Hash]: padded_length = get_next_power_of_two(len(leaves)) o = [Hash()] * padded_length + leaves + [Hash()] * (padded_length - len(leaves)) for i in range(len(leaves) - 1, 0, -1): From 8e1333aad198025cb43640b07f52e7e8deeaa76b Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 15 Aug 2019 19:01:40 +0800 Subject: [PATCH 118/130] Add `SSZVariableName` custom type --- scripts/build_spec.py | 1 + test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 07306af8a..10e6034f2 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -55,6 +55,7 @@ from eth2spec.utils.ssz.ssz_impl import ( ) from eth2spec.utils.ssz.ssz_typing import ( BasicValue, Elements, BaseList, SSZType, + SSZVariableName, Container, List, Vector, Bytes, BytesN, Bitlist, Bitvector, Bits, Bytes1, Bytes4, Bytes8, Bytes32, Bytes48, Bytes96, uint64, bit, boolean, diff --git a/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py b/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py index 1f199e6e1..bcccb91b2 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py @@ -1,8 +1,11 @@ -from typing import Dict, Iterator +from typing import Dict, Iterator, NewType import copy from types import GeneratorType +SSZVariableName = NewType('SSZVariableName', str) + + class DefaultingTypeMeta(type): def default(cls): raise Exception("Not implemented") From bb0b5b09cc8b343f2b3a25fd18db5ba1b8a11a6b Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 15 Aug 2019 19:02:21 +0800 Subject: [PATCH 119/130] Use `SSZVariableName` instead of `str`, and fix some mypy errors --- specs/light_client/merkle_proofs.md | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index d6c6dee62..73c4c603d 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -26,7 +26,6 @@ We define the following Python custom types for type hinting and readability: -| Name | SSZ equivalent | Description | | - | - | - | | `GeneralizedIndex` | `uint64` | the index of a node in a binary Merkle tree | @@ -50,7 +49,7 @@ def get_previous_power_of_two(x: int) -> int: """ Get the previous power of 2 >= the input. """ - return x if x <= 2 else 2 * get_previous_power_of_2(x // 2) + return x if x <= 2 else 2 * get_previous_power_of_two(x // 2) ``` ## Generalized Merkle tree index @@ -67,9 +66,9 @@ In a binary Merkle tree, we define a "generalized index" of a node as `2**depth Note that the generalized index has the convenient property that the two children of node `k` are `2k` and `2k+1`, and also that it equals the position of a node in the linear representation of the Merkle tree that's computed by this function: ```python -def merkle_tree(leaves: Squence[Hash]) -> Squence[Hash]: +def merkle_tree(leaves: Sequence[Hash]) -> Sequence[Hash]: padded_length = get_next_power_of_two(len(leaves)) - o = [Hash()] * padded_length + leaves + [Hash()] * (padded_length - len(leaves)) + o = [Hash()] * padded_length + list(leaves) + [Hash()] * (padded_length - len(leaves)) for i in range(len(leaves) - 1, 0, -1): o[i] = hash(o[i * 2] + o[i * 2 + 1]) return o @@ -106,12 +105,12 @@ def item_length(typ: SSZType) -> int: ``` ```python -def get_elem_type(typ: Union[BaseList, Container], index: Union[int, str]) -> SSZType: +def get_elem_type(typ: Union[BaseList, Container], index_or_variable_name: Union[int, SSZVariableName]) -> SSZType: """ Return the type of the element of an object of the given type with the given index or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) """ - return typ.get_fields()[index] if issubclass(typ, Container) else typ.elem_type + return typ.get_fields()[index_or_variable_name] if issubclass(typ, Container) else typ.elem_type ``` ```python @@ -137,7 +136,7 @@ def chunk_count(typ: SSZType) -> int: ``` ```python -def get_item_position(typ: SSZType, index: Union[int, str]) -> Tuple[int, int, int]: +def get_item_position(typ: SSZType, index_or_variable_name: Union[int, SSZVariableName]) -> Tuple[int, int, int]: """ Return three variables: (i) the index of the chunk in which the given element of the item is represented; @@ -146,16 +145,18 @@ def get_item_position(typ: SSZType, index: Union[int, str]) -> Tuple[int, int, i For example: for a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16) """ if issubclass(typ, Elements): + index = int(index_or_variable_name) start = index * item_length(typ.elem_type) return start // 32, start % 32, start % 32 + item_length(typ.elem_type) elif issubclass(typ, Container): - return typ.get_field_names().index(index), 0, item_length(get_elem_type(typ, index)) + variable_name = int(index_or_variable_name) + return typ.get_field_names().index(variable_name), 0, item_length(get_elem_type(typ, variable_name)) else: raise Exception("Only lists/vectors/containers supported") ``` ```python -def get_generalized_index(typ: SSZType, path: List[Union[int, str]]) -> Optional[GeneralizedIndex]: +def get_generalized_index(typ: SSZType, path: Sequence[Union[int, SSZVariableName]]) -> Optional[GeneralizedIndex]: """ Converts a path (eg. `[7, "foo", 3]` for `x[7].foo[3]`, `[12, "bar", "__len__"]` for `len(x[12].bar)`) into the generalized index representing its position in the Merkle tree. @@ -248,7 +249,7 @@ x x . . . . x * First, we provide a method for computing the generalized indices of the auxiliary tree nodes that a proof of a given set of generalized indices will require: ```python -def get_branch_indices(tree_index: GeneralizedIndex) -> List[GeneralizedIndex]: +def get_branch_indices(tree_index: GeneralizedIndex) -> Sequence[GeneralizedIndex]: """ Get the generalized indices of the sister chunks along the path from the chunk with the given tree index to the root. @@ -260,7 +261,7 @@ def get_branch_indices(tree_index: GeneralizedIndex) -> List[GeneralizedIndex]: ``` ```python -def get_helper_indices(indices: List[GeneralizedIndex]) -> List[GeneralizedIndex]: +def get_helper_indices(indices: Sequence[GeneralizedIndex]) -> Sequence[GeneralizedIndex]: """ Get the generalized indices of all "extra" chunks in the tree needed to prove the chunks with the given generalized indices. Note that the decreasing order is chosen deliberately to ensure equivalence to the @@ -268,7 +269,7 @@ def get_helper_indices(indices: List[GeneralizedIndex]) -> List[GeneralizedIndex """ all_indices: Set[GeneralizedIndex] = set() for index in indices: - all_indices = all_indices.union(set(get_branch_indices(index) + [index])) + all_indices = all_indices.union(set(list(get_branch_indices(index)) + [index])) return sorted([ x for x in all_indices if ( From 24e583d5d9e2f8fbcd1a7ceca7e6d2672d222027 Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Fri, 16 Aug 2019 11:01:48 -0600 Subject: [PATCH 120/130] add discord and add some external resources --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0ae6156e7..acc60a0cf 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Ethereum 2.0 Specifications -[![Join the chat at https://gitter.im/ethereum/sharding](https://badges.gitter.im/ethereum/sharding.svg)](https://gitter.im/ethereum/sharding?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +[![Join the chat at https://discord.gg/hpFs23p](https://img.shields.io/badge/chat-on%20discord-blue.svg)](https://discord.gg/hpFs23p) [![Join the chat at https://gitter.im/ethereum/sharding](https://badges.gitter.im/ethereum/sharding.svg)](https://gitter.im/ethereum/sharding?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) To learn more about sharding and Ethereum 2.0 (Serenity), see the [sharding FAQ](https://github.com/ethereum/wiki/wiki/Sharding-FAQ) and the [research compendium](https://notes.ethereum.org/s/H1PGqDhpm). @@ -47,8 +47,15 @@ The following are the broad design goals for Ethereum 2.0: * to allow for a typical consumer laptop with `O(C)` resources to process/validate `O(1)` shards (including any system level validation such as the beacon chain) +## Useful external resources + +* [Design Rationale](https://notes.ethereum.org/s/rkhCgQteN#) +* [Phase 0 Onboarding Document](https://notes.ethereum.org/s/Bkn3zpwxB) + + ## For spec contributors + Documentation on the different components used during spec writing can be found here: * [YAML Test Generators](test_generators/README.md) * [Executable Python Spec, with Py-tests](test_libs/pyspec/README.md) From 35d89e2706a3f482a1594cfdf08123093edcbf32 Mon Sep 17 00:00:00 2001 From: Martin Lundfall Date: Mon, 19 Aug 2019 13:03:51 +0200 Subject: [PATCH 121/130] merkle_proofs, simple-serialize, test_gen/README: update ToC --- specs/light_client/merkle_proofs.md | 19 +++++++++++-------- specs/simple-serialize.md | 1 + test_generators/README.md | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 21115dd27..345435133 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -6,14 +6,17 @@ - [Merkle proof formats](#merkle-proof-formats) - - [Table of contents](#table-of-contents) - - [Constants](#constants) - - [Generalized Merkle tree index](#generalized-merkle-tree-index) - - [SSZ object to index](#ssz-object-to-index) - - [Merkle multiproofs](#merkle-multiproofs) - - [MerklePartial](#merklepartial) - - [`SSZMerklePartial`](#sszmerklepartial) - - [Proofs for execution](#proofs-for-execution) + - [Table of contents](#table-of-contents) + - [Generalized Merkle tree index](#generalized-merkle-tree-index) + - [SSZ object to index](#ssz-object-to-index) + - [Helpers for generalized indices](#helpers-for-generalized-indices) + - [`concat_generalized_indices`](#concat_generalized_indices) + - [`get_generalized_index_length`](#get_generalized_index_length) + - [`get_generalized_index_bit`](#get_generalized_index_bit) + - [`generalized_index_sibling`](#generalized_index_sibling) + - [`generalized_index_child`](#generalized_index_child) + - [`generalized_index_parent`](#generalized_index_parent) + - [Merkle multiproofs](#merkle-multiproofs) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 50d091c07..5b8e5e8f4 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -26,6 +26,7 @@ - [Deserialization](#deserialization) - [Merkleization](#merkleization) - [Self-signed containers](#self-signed-containers) + - [Summaries and expansions](#summaries-and-expansions) - [Implementations](#implementations) diff --git a/test_generators/README.md b/test_generators/README.md index 7a4a5c536..abcb8a1ee 100644 --- a/test_generators/README.md +++ b/test_generators/README.md @@ -9,6 +9,24 @@ On releases, test generators are run by the release manager. Test-generation of An automated nightly tests release system, with a config filter applied, is being considered as implementation needs mature. +## Table of contents + + + + + +- [How to run generators](#how-to-run-generators) + - [Cleaning](#cleaning) + - [Running all test generators](#running-all-test-generators) + - [Running a single generator](#running-a-single-generator) +- [Developing a generator](#developing-a-generator) +- [How to add a new test generator](#how-to-add-a-new-test-generator) +- [How to remove a test generator](#how-to-remove-a-test-generator) + + + + + ## How to run generators Prerequisites: From 62d37593fbd3ed53fd45e6ba80b0ba2f2130b676 Mon Sep 17 00:00:00 2001 From: Martin Lundfall Date: Mon, 19 Aug 2019 13:05:44 +0200 Subject: [PATCH 122/130] Correct various typos --- scripts/build_spec.py | 2 +- test_libs/pyspec/eth2spec/test/context.py | 2 +- test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py | 2 +- test_libs/pyspec/eth2spec/utils/ssz/test_ssz_typing.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 88c3d46fb..e6a95e028 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -210,7 +210,7 @@ ignored_dependencies = [ def dependency_order_ssz_objects(objects: Dict[str, str], custom_types: Dict[str, str]) -> None: """ - Determines which SSZ Object is depenedent on which other and orders them appropriately + Determines which SSZ Object is dependent on which other and orders them appropriately """ items = list(objects.items()) for key, value in items: diff --git a/test_libs/pyspec/eth2spec/test/context.py b/test_libs/pyspec/eth2spec/test/context.py index 5a0ddb59d..5cc42c510 100644 --- a/test_libs/pyspec/eth2spec/test/context.py +++ b/test_libs/pyspec/eth2spec/test/context.py @@ -101,7 +101,7 @@ all_phases = ['phase0', 'phase1'] def with_all_phases(fn): """ - A decorator for running a test wil every phase + A decorator for running a test with every phase """ return with_phases(all_phases)(fn) diff --git a/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py b/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py index 1f199e6e1..891633afe 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py @@ -344,7 +344,7 @@ class BaseList(list, Elements): return super().__iter__() def last(self): - # be explict about getting the last item, for the non-python readers, and negative-index safety + # be explicit about getting the last item, for the non-python readers, and negative-index safety return self[len(self) - 1] diff --git a/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_typing.py b/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_typing.py index f746a29c9..d5a53c5fa 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_typing.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_typing.py @@ -222,7 +222,7 @@ def test_bytesn_subclass(): def test_uint_math(): - assert uint8(0) + uint8(uint32(16)) == uint8(16) # allow explict casting to make invalid addition valid + assert uint8(0) + uint8(uint32(16)) == uint8(16) # allow explicit casting to make invalid addition valid expect_value_error(lambda: uint8(0) - uint8(1), "no underflows allowed") expect_value_error(lambda: uint8(1) + uint8(255), "no overflows allowed") From 6722608978677fa0991875748e61afe0dbedb9ea Mon Sep 17 00:00:00 2001 From: Martin Lundfall Date: Mon, 19 Aug 2019 13:06:21 +0200 Subject: [PATCH 123/130] Add codespell whitelist --- .codespell-whitelist | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .codespell-whitelist diff --git a/.codespell-whitelist b/.codespell-whitelist new file mode 100644 index 000000000..ff694e380 --- /dev/null +++ b/.codespell-whitelist @@ -0,0 +1,2 @@ +uint +byteorder \ No newline at end of file From a843e9aeeba3312a3cc2b738fbc7dfeb21a24fa5 Mon Sep 17 00:00:00 2001 From: Martin Lundfall Date: Mon, 19 Aug 2019 13:47:09 +0200 Subject: [PATCH 124/130] specs/ fix links --- specs/core/0_beacon-chain.md | 2 +- specs/networking/p2p-interface.md | 2 +- specs/simple-serialize.md | 2 +- specs/validator/0_beacon-chain-validator.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index a42465ad4..7ed3226e1 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -169,7 +169,7 @@ The following values are (non-configurable) constants used throughout the specif ## Configuration -*Note*: The default mainnet configuration values are included here for spec-design purposes. The different configurations for mainnet, testnets, and YAML-based testing can be found in the [`configs/constant_presets`](../../configs/constant_presets) directory. These configurations are updated for releases and may be out of sync during `dev` changes. +*Note*: The default mainnet configuration values are included here for spec-design purposes. The different configurations for mainnet, testnets, and YAML-based testing can be found in the [`configs/constant_presets`](../../configs) directory. These configurations are updated for releases and may be out of sync during `dev` changes. ### Misc diff --git a/specs/networking/p2p-interface.md b/specs/networking/p2p-interface.md index cdad92085..b3e0db50e 100644 --- a/specs/networking/p2p-interface.md +++ b/specs/networking/p2p-interface.md @@ -301,7 +301,7 @@ Here, `result` represents the 1-byte response code. The token of the negotiated protocol ID specifies the type of encoding to be used for the req/resp interaction. Two values are possible at this time: -- `ssz`: The contents are [SSZ-encoded](#ssz-encoding). This encoding type MUST be supported by all clients. For objects containing a single field, only the field is SSZ-encoded not a container with a single field. For example, the `BeaconBlocks` response would be an SSZ-encoded list of `BeaconBlock`s. All SSZ-Lists in the Req/Resp domain will have a maximum list size of `SSZ_MAX_LIST_SIZE`. +- `ssz`: The contents are [SSZ-encoded](../simple-serialize.md). This encoding type MUST be supported by all clients. For objects containing a single field, only the field is SSZ-encoded not a container with a single field. For example, the `BeaconBlocks` response would be an SSZ-encoded list of `BeaconBlock`s. All SSZ-Lists in the Req/Resp domain will have a maximum list size of `SSZ_MAX_LIST_SIZE`. - `ssz_snappy`: The contents are SSZ-encoded and then compressed with [Snappy](https://github.com/google/snappy). MAY be supported in the interoperability testnet; MUST be supported in mainnet. #### SSZ-encoding strategy (with or without Snappy) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 5b8e5e8f4..588200f20 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -231,7 +231,7 @@ We similarly define "summary types" and "expansion types". For example, [`Beacon | Python | Ethereum 2.0 | Ethereum Foundation | [https://github.com/ethereum/py-ssz](https://github.com/ethereum/py-ssz) | | Rust | Lighthouse | Sigma Prime | [https://github.com/sigp/lighthouse/tree/master/eth2/utils/ssz](https://github.com/sigp/lighthouse/tree/master/eth2/utils/ssz) | | Nim | Nimbus | Status | [https://github.com/status-im/nim-beacon-chain/blob/master/beacon_chain/ssz.nim](https://github.com/status-im/nim-beacon-chain/blob/master/beacon_chain/ssz.nim) | -| Rust | Shasper | ParityTech | [https://github.com/paritytech/shasper/tree/master/utils/ssz](https://github.com/paritytech/shasper/tree/master/util/ssz) | +| Rust | Shasper | ParityTech | [https://github.com/paritytech/shasper/tree/master/utils/ssz](https://github.com/paritytech/shasper/tree/master/utils/ssz) | | TypeScript | Lodestar | ChainSafe Systems | [https://github.com/ChainSafe/ssz-js](https://github.com/ChainSafe/ssz-js) | | Java | Cava | ConsenSys | [https://www.github.com/ConsenSys/cava/tree/master/ssz](https://www.github.com/ConsenSys/cava/tree/master/ssz) | | Go | Prysm | Prysmatic Labs | [https://github.com/prysmaticlabs/go-ssz](https://github.com/prysmaticlabs/go-ssz) | diff --git a/specs/validator/0_beacon-chain-validator.md b/specs/validator/0_beacon-chain-validator.md index 188a6a291..ef5ad4415 100644 --- a/specs/validator/0_beacon-chain-validator.md +++ b/specs/validator/0_beacon-chain-validator.md @@ -266,7 +266,7 @@ Up to `MAX_ATTESTATIONS`, aggregate attestations can be included in the `block`. ##### Deposits -If there are any unprocessed deposits for the existing `state.eth1_data` (i.e. `state.eth1_data.deposit_count > state.eth1_deposit_index`), then pending deposits _must_ be added to the block. The expected number of deposits is exactly `min(MAX_DEPOSITS, eth1_data.deposit_count - state.eth1_deposit_index)`. These [`deposits`](../core/0_beacon-chain.md#deposit) are constructed from the `Deposit` logs from the [Eth 1.0 deposit contract](../core/0_deposit-contract) and must be processed in sequential order. The deposits included in the `block` must satisfy the verification conditions found in [deposits processing](../core/0_beacon-chain.md#deposits). +If there are any unprocessed deposits for the existing `state.eth1_data` (i.e. `state.eth1_data.deposit_count > state.eth1_deposit_index`), then pending deposits _must_ be added to the block. The expected number of deposits is exactly `min(MAX_DEPOSITS, eth1_data.deposit_count - state.eth1_deposit_index)`. These [`deposits`](../core/0_beacon-chain.md#deposit) are constructed from the `Deposit` logs from the [Eth 1.0 deposit contract](../core/0_deposit-contract.md) and must be processed in sequential order. The deposits included in the `block` must satisfy the verification conditions found in [deposits processing](../core/0_beacon-chain.md#deposits). The `proof` for each deposit must be constructed against the deposit root contained in `state.eth1_data` rather than the deposit root at the time the deposit was initially logged from the 1.0 chain. This entails storing a full deposit merkle tree locally and computing updated proofs against the `eth1_data.deposit_root` as needed. See [`minimal_merkle.py`](https://github.com/ethereum/research/blob/master/spec_pythonizer/utils/merkle_minimal.py) for a sample implementation. From 663d43d07f6bd4f263e4b7f5831747a28c6ac943 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Tue, 20 Aug 2019 18:55:30 +0800 Subject: [PATCH 125/130] PR feedback, fix type hinting, add missing `Container.get_field_names()` method --- scripts/build_spec.py | 4 +++- specs/light_client/merkle_proofs.md | 19 ++++++++++++------- .../pyspec/eth2spec/utils/ssz/ssz_typing.py | 11 +++++++---- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 10e6034f2..28022a752 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -55,7 +55,6 @@ from eth2spec.utils.ssz.ssz_impl import ( ) from eth2spec.utils.ssz.ssz_typing import ( BasicValue, Elements, BaseList, SSZType, - SSZVariableName, Container, List, Vector, Bytes, BytesN, Bitlist, Bitvector, Bits, Bytes1, Bytes4, Bytes8, Bytes32, Bytes48, Bytes96, uint64, bit, boolean, @@ -68,6 +67,9 @@ from eth2spec.utils.bls import ( ) from eth2spec.utils.hash_function import hash + + +SSZVariableName = str ''' SUNDRY_CONSTANTS_FUNCTIONS = ''' def ceillog2(x: uint64) -> int: diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 73c4c603d..2a4e100d6 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -38,8 +38,6 @@ def get_next_power_of_two(x: int) -> int: """ if x <= 2: return x - elif x % 2 == 0: - return 2 * get_next_power_of_two(x // 2) else: return 2 * get_next_power_of_two((x + 1) // 2) ``` @@ -49,7 +47,10 @@ def get_previous_power_of_two(x: int) -> int: """ Get the previous power of 2 >= the input. """ - return x if x <= 2 else 2 * get_previous_power_of_two(x // 2) + if x <= 2: + return x + else: + return 2 * get_previous_power_of_two(x // 2) ``` ## Generalized Merkle tree index @@ -91,7 +92,7 @@ y_data_root len(y) ....... ``` -We can now define a concept of a "path", a way of describing a function that takes as input an SSZ object and outputs some specific (possibly deeply nested) member. For example, `foo -> foo.x` is a path, as are `foo -> len(foo.y)` and `foo -> foo.y[5].w`. We'll describe paths as lists, which can have two representations. In "human-readable form", they are `["x"]`, `["y", "__len__"]` and `["y", 5, "w"]` respectively. In "encoded form", they are lists of `uint64` values, in these cases (assuming the fields of `foo` in order are `x` then `y`, and `w` is the first field of `y[i]`) `[0]`, `[1, 2**64-1]`, `[1, 5, 0]`. +We can now define a concept of a "path", a way of describing a function that takes as input an SSZ object and outputs some specific (possibly deeply nested) member. For example, `foo -> foo.x` is a path, as are `foo -> len(foo.y)` and `foo -> foo.y[5].w`. We'll describe paths as lists, which can have two representations. In "human-readable form", they are `["x"]`, `["y", "__len__"]` and `["y", 5, "w"]` respectively. In "encoded form", they are lists of `uint64` values, in these cases (assuming the fields of `foo` in order are `x` then `y`, and `w` is the first field of `y[i]`) `[0]`, `[1, 2**64-1]`, `[1, 5, 0]`. We define `SSZVariableName` as the member variable name string, i.e., a path is presented as a sequence of integers and `SSZVariableName`. ```python def item_length(typ: SSZType) -> int: @@ -149,7 +150,7 @@ def get_item_position(typ: SSZType, index_or_variable_name: Union[int, SSZVariab start = index * item_length(typ.elem_type) return start // 32, start % 32, start % 32 + item_length(typ.elem_type) elif issubclass(typ, Container): - variable_name = int(index_or_variable_name) + variable_name = index_or_variable_name return typ.get_field_names().index(variable_name), 0, item_length(get_elem_type(typ, variable_name)) else: raise Exception("Only lists/vectors/containers supported") @@ -161,11 +162,15 @@ def get_generalized_index(typ: SSZType, path: Sequence[Union[int, SSZVariableNam Converts a path (eg. `[7, "foo", 3]` for `x[7].foo[3]`, `[12, "bar", "__len__"]` for `len(x[12].bar)`) into the generalized index representing its position in the Merkle tree. """ - root: Optional[GeneralizedIndex] = GeneralizedIndex(1) + root = GeneralizedIndex(1) for p in path: assert not issubclass(typ, BasicValue) # If we descend to a basic type, the path cannot continue further if p == '__len__': - typ, root = uint64, root * 2 + 1 if issubclass(typ, (List, Bytes)) else None + typ = uint64 + if issubclass(typ, (List, Bytes)): + root = GeneralizedIndex(root * 2 + 1) + else: + return None else: pos, _, _ = get_item_position(typ, p) base_index = (GeneralizedIndex(2) if issubclass(typ, (List, Bytes)) else GeneralizedIndex(1)) diff --git a/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py b/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py index bcccb91b2..ff942b84d 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py @@ -1,11 +1,8 @@ -from typing import Dict, Iterator, NewType +from typing import Dict, Iterator, Iterable import copy from types import GeneratorType -SSZVariableName = NewType('SSZVariableName', str) - - class DefaultingTypeMeta(type): def default(cls): raise Exception("Not implemented") @@ -198,6 +195,12 @@ class Container(Series, metaclass=SSZType): return {} return dict(cls.__annotations__) + @classmethod + def get_field_names(cls) -> Iterable[SSZType]: + if not hasattr(cls, '__annotations__'): # no container fields + return () + return list(cls.__annotations__.keys()) + @classmethod def default(cls): return cls(**{f: t.default() for f, t in cls.get_fields().items()}) From b22caeb2463477b9ce5402a258f92782c915834d Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Tue, 20 Aug 2019 19:09:21 +0800 Subject: [PATCH 126/130] Add basic merkle proofs tests --- .../eth2spec/test/merkle_proofs/__init__.py | 0 .../test/merkle_proofs/test_merkle_proofs.py | 98 +++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 test_libs/pyspec/eth2spec/test/merkle_proofs/__init__.py create mode 100644 test_libs/pyspec/eth2spec/test/merkle_proofs/test_merkle_proofs.py diff --git a/test_libs/pyspec/eth2spec/test/merkle_proofs/__init__.py b/test_libs/pyspec/eth2spec/test/merkle_proofs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test_libs/pyspec/eth2spec/test/merkle_proofs/test_merkle_proofs.py b/test_libs/pyspec/eth2spec/test/merkle_proofs/test_merkle_proofs.py new file mode 100644 index 000000000..5e2c4046b --- /dev/null +++ b/test_libs/pyspec/eth2spec/test/merkle_proofs/test_merkle_proofs.py @@ -0,0 +1,98 @@ + +import re +from eth_utils import ( + to_tuple, +) + +from eth2spec.test.context import ( + spec_state_test, + with_all_phases_except, +) +from eth2spec.utils.ssz.ssz_typing import ( + Bytes32, + Container, + List, + uint64, +) + + +class Foo(Container): + x: uint64 + y: List[Bytes32, 2] + +# Tree +# root +# / \ +# x y_root +# / \ +# y_data_root len(y) +# / \ +# / \ / \ +# +# Generalized indices +# 1 +# / \ +# 2 (x) 3 (y_root) +# / \ +# 6 7 +# / \ +# 12 13 + + +@to_tuple +def ssz_object_to_path(start, end): + is_len = False + len_findall = re.findall(r"(?<=len\().*(?=\))", end) + if len_findall: + is_len = True + end = len_findall[0] + + route = '' + if end.startswith(start): + route = end[len(start):] + + segments = route.split('.') + for word in segments: + index_match = re.match(r"(\w+)\[(\d+)]", word) + if index_match: + yield from index_match.groups() + elif len(word): + yield word + if is_len: + yield '__len__' + + +to_path_test_cases = [ + ('foo', 'foo.x', ('x',)), + ('foo', 'foo.x[100].y', ('x', '100', 'y')), + ('foo', 'foo.x[100].y[1].z[2]', ('x', '100', 'y', '1', 'z', '2')), + ('foo', 'len(foo.x[100].y[1].z[2])', ('x', '100', 'y', '1', 'z', '2', '__len__')), +] + + +def test_to_path(): + for test_case in to_path_test_cases: + start, end, expected = test_case + assert ssz_object_to_path(start, end) == expected + + +generalized_index_cases = [ + (Foo, ('x',), 2), + (Foo, ('y',), 3), + (Foo, ('y', 0), 12), + (Foo, ('y', 1), 13), + (Foo, ('y', '__len__'), None), +] + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_get_generalized_index(spec, state): + for typ, path, generalized_index in generalized_index_cases: + assert spec.get_generalized_index( + typ=typ, + path=path, + ) == generalized_index + yield 'typ', typ + yield 'path', path + yield 'generalized_index', generalized_index From d6bbd9bfa10204e7f0ec2a97f575ed55072b8cdc Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Tue, 20 Aug 2019 19:21:12 +0800 Subject: [PATCH 127/130] Add `BaseBytes` to cover `Bytes` and `BytesN` --- scripts/build_spec.py | 2 +- specs/light_client/merkle_proofs.md | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 202801d09..0a5171e8f 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -54,7 +54,7 @@ from eth2spec.utils.ssz.ssz_impl import ( is_zero, ) from eth2spec.utils.ssz.ssz_typing import ( - BasicValue, Elements, BaseList, SSZType, + BasicValue, Elements, BaseBytes, BaseList, SSZType, Container, List, Vector, Bytes, BytesN, Bitlist, Bitvector, Bits, Bytes1, Bytes4, Bytes8, Bytes32, Bytes48, Bytes96, uint64, bit, boolean, diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 2a4e100d6..d7f0ab382 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -106,7 +106,8 @@ def item_length(typ: SSZType) -> int: ``` ```python -def get_elem_type(typ: Union[BaseList, Container], index_or_variable_name: Union[int, SSZVariableName]) -> SSZType: +def get_elem_type(typ: Union[BaseBytes, BaseList, Container], + index_or_variable_name: Union[int, SSZVariableName]) -> SSZType: """ Return the type of the element of an object of the given type with the given index or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) From 5fcfcac75e093af2c39ec860e3d35088a96a2d5c Mon Sep 17 00:00:00 2001 From: vbuterin Date: Tue, 20 Aug 2019 14:33:29 +0200 Subject: [PATCH 128/130] Updated light client sync for newer committees (#1316) --- specs/light_client/sync_protocol.md | 300 +++++++++++++--------------- 1 file changed, 144 insertions(+), 156 deletions(-) diff --git a/specs/light_client/sync_protocol.md b/specs/light_client/sync_protocol.md index 580b669f2..944abf8c1 100644 --- a/specs/light_client/sync_protocol.md +++ b/specs/light_client/sync_protocol.md @@ -1,199 +1,187 @@ -# Beacon Chain Light Client Syncing +# Minimal Light Client Design -**Notice**: This document is a work-in-progress for researchers and implementers. One of the design goals of the Eth 2.0 beacon chain is light-client friendliness, not only to allow low-resource clients (mobile phones, IoT, etc.) to maintain access to the blockchain in a reasonably safe way, but also to facilitate the development of "bridges" between the Eth 2.0 beacon chain and other chains. +**Notice**: This document is a work-in-progress for researchers and implementers. ## Table of contents -- [Beacon Chain Light Client Syncing](#beacon-chain-light-client-syncing) +- [Minimal Light Client Design](#minimal-light-client-design) - [Table of contents](#table-of-contents) - - [Preliminaries](#preliminaries) - - [Expansions](#expansions) - - [`get_active_validator_indices`](#get_active_validator_indices) - - [`MerklePartial`](#merklepartial) - - [`PeriodData`](#perioddata) - - [`get_earlier_start_epoch`](#get_earlier_start_epoch) - - [`get_later_start_epoch`](#get_later_start_epoch) - - [`get_period_data`](#get_period_data) - - [Light client state](#light-client-state) - - [Updating the shuffled committee](#updating-the-shuffled-committee) - - [Computing the current committee](#computing-the-current-committee) - - [Verifying blocks](#verifying-blocks) + - [Introduction](#introduction) + - [Custom types](#custom-types) + - [Constants](#constants) + - [Containers](#containers) + - [`LightClientUpdate`](#lightclientupdate) + - [Helpers](#helpers) + - [`LightClientMemory`](#lightclientmemory) + - [`unpack_compact_validator`](#unpack_compact_validator) + - [`get_persistent_committee_pubkeys_and_balances`](#get_persistent_committee_pubkeys_and_balances) + - [Light client state updates](#light-client-state-updates) + - [Data overhead](#data-overhead) -## Preliminaries +## Introduction -### Expansions +Ethereum 2.0 is designed to be light client friendly. This allows low-resource clients such as mobile phones to access Ethereum 2.0 with reasonable safety and liveness. It also facilitates the development of "bridges" to external blockchains. This document suggests a minimal light client design for the beacon chain. -We define an "expansion" of an object as an object where a field in an object that is meant to represent the `hash_tree_root` of another object is replaced by the object. Note that defining expansions is not a consensus-layer-change; it is merely a "re-interpretation" of the object. Particularly, the `hash_tree_root` of an expansion of an object is identical to that of the original object, and we can define expansions where, given a complete history, it is always possible to compute the expansion of any object in the history. The opposite of an expansion is a "summary" (e.g. `BeaconBlockHeader` is a summary of `BeaconBlock`). +## Custom types -We define two expansions: +We define the following Python custom types for type hinting and readability: -* `ExtendedBeaconState`, which is identical to a `BeaconState` except `compact_committees_roots: List[Bytes32]` is replaced by `active_indices: List[List[ValidatorIndex]]`, where `BeaconState.compact_committees_roots[i] = hash_tree_root(ExtendedBeaconState.active_indices[i])`. -* `ExtendedBeaconBlock`, which is identical to a `BeaconBlock` except `state_root` is replaced with the corresponding `state: ExtendedBeaconState`. +| Name | SSZ equivalent | Description | +| - | - | - | +| `CompactValidator` | `uint64` | compact representation of a validator for light clients | -### `get_active_validator_indices` +## Constants -Note that there is now a new way to compute `get_active_validator_indices`: +| Name | Value | +| - | - | +| `BEACON_CHAIN_ROOT_IN_SHARD_BLOCK_HEADER_DEPTH` | `4` | +| `BEACON_CHAIN_ROOT_IN_SHARD_BLOCK_HEADER_INDEX` | **TBD** | +| `PERSISTENT_COMMITTEE_ROOT_IN_BEACON_STATE_DEPTH` | `5` | +| `PERSISTENT_COMMITTEE_ROOT_IN_BEACON_STATE_INDEX` | **TBD** | + +## Containers + +### `LightClientUpdate` ```python -def get_active_validator_indices(state: ExtendedBeaconState, epoch: Epoch) -> List[ValidatorIndex]: - return state.active_indices[epoch % EPOCHS_PER_HISTORICAL_VECTOR] +class LightClientUpdate(container): + # Shard block root (and authenticating signature data) + shard_block_root: Hash + fork_version: Version + aggregation_bits: Bitlist[MAX_VALIDATORS_PER_COMMITTEE] + signature: BLSSignature + # Updated beacon header (and authenticating branch) + header: BeaconBlockHeader + header_branch: Vector[Hash, BEACON_CHAIN_ROOT_IN_SHARD_BLOCK_HEADER_DEPTH] + # Updated persistent committee (and authenticating branch) + committee: CompactCommittee + committee_branch: Vector[Hash, PERSISTENT_COMMITTEE_ROOT_IN_BEACON_STATE_DEPTH + log_2(SHARD_COUNT)] ``` -Note that it takes `state` instead of `state.validators` as an argument. This does not affect its use in `get_shuffled_committee`, because `get_shuffled_committee` has access to the full `state` as one of its arguments. +## Helpers - -### `MerklePartial` - -A `MerklePartial(f, *args)` is an object that contains a minimal Merkle proof needed to compute `f(*args)`. A `MerklePartial` can be used in place of a regular SSZ object, though a computation would return an error if it attempts to access part of the object that is not contained in the proof. - -### `PeriodData` +### `LightClientMemory` ```python -{ - 'validator_count': 'uint64', - 'seed': 'bytes32', - 'committee': [Validator], -} +@dataclass +class LightClientMemory(object): + shard: Shard # Randomly initialized and retained forever + header: BeaconBlockHeader # Beacon header which is not expected to revert + # Persistent committees corresponding to the beacon header + previous_committee: CompactCommittee + current_committee: CompactCommittee + next_committee: CompactCommittee ``` -### `get_earlier_start_epoch` +### `unpack_compact_validator` ```python -def get_earlier_start_epoch(slot: Slot) -> int: - return slot - slot % PERSISTENT_COMMITTEE_PERIOD - PERSISTENT_COMMITTEE_PERIOD * 2 -``` - -### `get_later_start_epoch` - -```python -def get_later_start_epoch(slot: Slot) -> int: - return slot - slot % PERSISTENT_COMMITTEE_PERIOD - PERSISTENT_COMMITTEE_PERIOD -``` - -### `get_period_data` - -```python -def get_period_data(block: ExtendedBeaconBlock, shard_id: Shard, later: bool) -> PeriodData: - period_start = get_later_start_epoch(header.slot) if later else get_earlier_start_epoch(header.slot) - validator_count = len(get_active_validator_indices(state, period_start)) - committee_count = validator_count // (SHARD_COUNT * TARGET_COMMITTEE_SIZE) + 1 - indices = get_period_committee(block.state, shard_id, period_start, 0, committee_count) - return PeriodData( - validator_count, - get_seed(block.state, period_start), - [block.state.validators[i] for i in indices], +def unpack_compact_validator(compact_validator: CompactValidator) -> Tuple[ValidatorIndex, bool, uint64]: + """ + Return the index, slashed, effective_balance // EFFECTIVE_BALANCE_INCREMENT of ``compact_validator``. + """ + return ( + ValidatorIndex(compact_validator >> 16), + (compact_validator >> 15) % 2, + uint64(compact_validator & (2**15 - 1)), ) ``` -### Light client state - -A light client will keep track of: - -* A random `shard_id` in `[0...SHARD_COUNT-1]` (selected once and retained forever) -* A block header that they consider to be finalized (`finalized_header`) and do not expect to revert. -* `later_period_data = get_period_data(finalized_header, shard_id, later=True)` -* `earlier_period_data = get_period_data(finalized_header, shard_id, later=False)` - -We use the struct `ValidatorMemory` to keep track of these variables. - -### Updating the shuffled committee - -If a client's `validator_memory.finalized_header` changes so that `header.slot // PERSISTENT_COMMITTEE_PERIOD` increases, then the client can ask the network for a `new_committee_proof = MerklePartial(get_period_data, validator_memory.finalized_header, shard_id, later=True)`. It can then compute: +### `get_persistent_committee_pubkeys_and_balances` ```python -earlier_period_data = later_period_data -later_period_data = get_period_data(new_committee_proof, finalized_header, shard_id, later=True) +def get_persistent_committee_pubkeys_and_balances(memory: LightClientMemory, + epoch: Epoch) -> Tuple[Sequence[BLSPubkey], Sequence[uint64]]: + """ + Return pubkeys and balances for the persistent committee at ``epoch``. + """ + current_period = compute_epoch_of_slot(memory.header.slot) // EPOCHS_PER_SHARD_PERIOD + next_period = epoch // EPOCHS_PER_SHARD_PERIOD + assert next_period in (current_period, current_period + 1) + if next_period == current_period: + earlier_committee, later_committee = memory.previous_committee, memory.current_committee + else: + earlier_committee, later_committee = memory.current_committee, memory.next_committee + + pubkeys = [] + balances = [] + for pubkey, compact_validator in zip(earlier_committee.pubkeys, earlier_committee.compact_validators): + index, slashed, balance = unpack_compact_validator(compact_validator) + if epoch % EPOCHS_PER_SHARD_PERIOD < index % EPOCHS_PER_SHARD_PERIOD: + pubkeys.append(pubkey) + balances.append(balance) + for pubkey, compact_validator in zip(later_committee.pubkeys, later_committee.compact_validators): + index, slashed, balance = unpack_compact_validator(compact_validator) + if epoch % EPOCHS_PER_SHARD_PERIOD >= index % EPOCHS_PER_SHARD_PERIOD: + pubkeys.append(pubkey) + balances.append(balance) + return pubkeys, balances ``` -The maximum size of a proof is `128 * ((22-7) * 32 + 110) = 75520` bytes for validator records and `(22-7) * 32 + 128 * 8 = 1504` for the active index proof (much smaller because the relevant active indices are all beside each other in the Merkle tree). This needs to be done once per `PERSISTENT_COMMITTEE_PERIOD` epochs (2048 epochs / 9 days), or ~38 bytes per epoch. +## Light client state updates -## Computing the current committee - -Here is a helper to compute the committee at a slot given the maximal earlier and later committees: +The state of a light client is stored in a `memory` object of type `LightClientMemory`. To advance its state a light client requests an `update` object of type `LightClientUpdate` from the network by sending a request containing `(memory.shard, memory.header.slot, slot_range_end)` and calls `update_memory(memory, update)`. ```python -def compute_committee(header: BeaconBlockHeader, - validator_memory: ValidatorMemory) -> List[ValidatorIndex]: - earlier_validator_count = validator_memory.earlier_period_data.validator_count - later_validator_count = validator_memory.later_period_data.validator_count - maximal_earlier_committee = validator_memory.earlier_period_data.committee - maximal_later_committee = validator_memory.later_period_data.committee - earlier_start_epoch = get_earlier_start_epoch(header.slot) - later_start_epoch = get_later_start_epoch(header.slot) - epoch = compute_epoch_of_slot(header.slot) +def update_memory(memory: LightClientMemory, update: LightClientUpdate) -> None: + # Verify the update does not skip a period + current_period = compute_epoch_of_slot(memory.header.slot) // EPOCHS_PER_SHARD_PERIOD + next_epoch = compute_epoch_of_shard_slot(update.header.slot) + next_period = next_epoch // EPOCHS_PER_SHARD_PERIOD + assert next_period in (current_period, current_period + 1) - committee_count = max( - earlier_validator_count // (SHARD_COUNT * TARGET_COMMITTEE_SIZE), - later_validator_count // (SHARD_COUNT * TARGET_COMMITTEE_SIZE), - ) + 1 + # Verify update header against shard block root and header branch + assert is_valid_merkle_branch( + leaf=hash_tree_root(update.header), + branch=update.header_branch, + depth=BEACON_CHAIN_ROOT_IN_SHARD_BLOCK_HEADER_DEPTH, + index=BEACON_CHAIN_ROOT_IN_SHARD_BLOCK_HEADER_INDEX, + root=update.shard_block_root, + ) - def get_offset(count: int, end: bool) -> int: - return get_split_offset( - count, - SHARD_COUNT * committee_count, - validator_memory.shard_id * committee_count + (1 if end else 0), - ) + # Verify persistent committee votes pass 2/3 threshold + pubkeys, balances = get_persistent_committee_pubkeys_and_balances(memory, next_epoch) + assert 3 * sum(filter(lambda i: update.aggregation_bits[i], balances)) > 2 * sum(balances) - actual_earlier_committee = maximal_earlier_committee[ - 0:get_offset(earlier_validator_count, True) - get_offset(earlier_validator_count, False) - ] - actual_later_committee = maximal_later_committee[ - 0:get_offset(later_validator_count, True) - get_offset(later_validator_count, False) - ] - def get_switchover_epoch(index): - return ( - bytes_to_int(hash(validator_memory.earlier_period_data.seed + int_to_bytes(index, length=3))[0:8]) % - PERSISTENT_COMMITTEE_PERIOD - ) - - # Take not-yet-cycled-out validators from earlier committee and already-cycled-in validators from - # later committee; return a sorted list of the union of the two, deduplicated - return sorted(list(set( - [i for i in actual_earlier_committee if epoch % PERSISTENT_COMMITTEE_PERIOD < get_switchover_epoch(i)] - + [i for i in actual_later_committee if epoch % PERSISTENT_COMMITTEE_PERIOD >= get_switchover_epoch(i)] - ))) -``` - -Note that this method makes use of the fact that the committee for any given shard always starts and ends at the same validator index independently of the committee count (this is because the validator set is split into `SHARD_COUNT * committee_count` slices but the first slice of a shard is a multiple `committee_count * i`, so the start of the slice is `n * committee_count * i // (SHARD_COUNT * committee_count) = n * i // SHARD_COUNT`, using the slightly nontrivial algebraic identity `(x * a) // ab == x // b`). - -## Verifying blocks - -If a client wants to update its `finalized_header` it asks the network for a `BlockValidityProof`, which is simply: - -```python -{ - 'header': BeaconBlockHeader, - 'shard_aggregate_signature': BLSSignature, - 'shard_bits': Bitlist[PLACEHOLDER], - 'shard_parent_block': ShardBlock, -} -``` - -The verification procedure is as follows: - -```python -def verify_block_validity_proof(proof: BlockValidityProof, validator_memory: ValidatorMemory) -> bool: - assert proof.shard_parent_block.beacon_chain_root == hash_tree_root(proof.header) - committee = compute_committee(proof.header, validator_memory) - # Verify that we have >=50% support - support_balance = sum([v.effective_balance for i, v in enumerate(committee) if proof.shard_bits[i]]) - total_balance = sum([v.effective_balance for i, v in enumerate(committee)]) - assert support_balance * 2 > total_balance # Verify shard attestations - group_public_key = bls_aggregate_pubkeys([ - v.pubkey for v, index in enumerate(committee) - if proof.shard_bits[index] - ]) - assert bls_verify( - pubkey=group_public_key, - message_hash=hash_tree_root(shard_parent_block), - signature=proof.shard_aggregate_signature, - domain=get_domain(state, compute_epoch_of_slot(shard_block.slot), DOMAIN_SHARD_ATTESTER), - ) + pubkey = bls_aggregate_pubkeys(filter(lambda i: update.aggregation_bits[i], pubkeys)) + domain = compute_domain(DOMAIN_SHARD_ATTESTER, update.fork_version) + assert bls_verify(pubkey, update.shard_block_root, update.signature, domain) + + # Update persistent committees if entering a new period + if next_period == current_period + 1: + assert is_valid_merkle_branch( + leaf=hash_tree_root(update.committee), + branch=update.committee_branch, + depth=PERSISTENT_COMMITTEE_ROOT_IN_BEACON_STATE_DEPTH + log_2(SHARD_COUNT), + index=PERSISTENT_COMMITTEE_ROOT_IN_BEACON_STATE_INDEX << log_2(SHARD_COUNT) + memory.shard, + root=hash_tree_root(update.header), + ) + memory.previous_committee = memory.current_committee + memory.current_committee = memory.next_committee + memory.next_committee = update.committee + + # Update header + memory.header = update.header ``` -The size of this proof is only 200 (header) + 96 (signature) + 16 (bits) + 352 (shard block) = 664 bytes. It can be reduced further by replacing `ShardBlock` with `MerklePartial(lambda x: x.beacon_chain_root, ShardBlock)`, which would cut off ~220 bytes. +## Data overhead + +Once every `EPOCHS_PER_SHARD_PERIOD` epochs (~27 hours) a light client downloads a `LightClientUpdate` object: + +* `shard_block_root`: 32 bytes +* `fork_version`: 4 bytes +* `aggregation_bits`: 16 bytes +* `signature`: 96 bytes +* `header`: 8 + 32 + 32 + 32 + 96 = 200 bytes +* `header_branch`: 4 * 32 = 128 bytes +* `committee`: 128 * (48 + 8) = 7,168 bytes +* `committee_branch`: (5 + 10) * 32 = 480 bytes + +The total overhead is 8,124 bytes, or ~0.083 bytes per second. The Bitcoin SPV equivalent is 80 bytes per ~560 seconds, or ~0.143 bytes per second. Various compression optimisations (similar to [these](https://github.com/RCasatta/compressedheaders)) are possible. + +A light client can choose to update the header (without updating the committee) more frequently than once every `EPOCHS_PER_SHARD_PERIOD` epochs at a cost of 32 + 4 + 16 + 96 + 200 + 128 = 476 bytes per update. From 7409b5ae829aa5cfa8be58e4fc3adaa61c69c39a Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Tue, 20 Aug 2019 20:57:37 +0800 Subject: [PATCH 129/130] Add basic `test_verify_merkle_proof` and `test_verify_merkle_multiproof` tests --- .../test/merkle_proofs/test_merkle_proofs.py | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/test_libs/pyspec/eth2spec/test/merkle_proofs/test_merkle_proofs.py b/test_libs/pyspec/eth2spec/test/merkle_proofs/test_merkle_proofs.py index 5e2c4046b..91c861de3 100644 --- a/test_libs/pyspec/eth2spec/test/merkle_proofs/test_merkle_proofs.py +++ b/test_libs/pyspec/eth2spec/test/merkle_proofs/test_merkle_proofs.py @@ -96,3 +96,53 @@ def test_get_generalized_index(spec, state): yield 'typ', typ yield 'path', path yield 'generalized_index', generalized_index + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_verify_merkle_proof(spec, state): + h = spec.hash + a = b'\x11' * 32 + b = b'\x22' * 32 + c = b'\x33' * 32 + d = b'\x44' * 32 + root = h(h(a + b) + h(c + d)) + leaf = a + generalized_index = 4 + proof = [b, h(c + d)] + + is_valid = spec.verify_merkle_proof( + leaf=leaf, + proof=proof, + index=generalized_index, + root=root, + ) + assert is_valid + + yield 'proof', proof + yield 'is_valid', is_valid + + +@with_all_phases_except(['phase0']) +@spec_state_test +def test_verify_merkle_multiproof(spec, state): + h = spec.hash + a = b'\x11' * 32 + b = b'\x22' * 32 + c = b'\x33' * 32 + d = b'\x44' * 32 + root = h(h(a + b) + h(c + d)) + leaves = [a, d] + generalized_indices = [4, 7] + proof = [c, b] # helper_indices = [6, 5] + + is_valid = spec.verify_merkle_multiproof( + leaves=leaves, + proof=proof, + indices=generalized_indices, + root=root, + ) + assert is_valid + + yield 'proof', proof + yield 'is_valid', is_valid From bbaa238742a93e2aa0524baaf6fd9049d2943d59 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Fri, 23 Aug 2019 20:16:46 +0800 Subject: [PATCH 130/130] Fix the definition of `GeneralizedIndex` --- scripts/build_spec.py | 3 ++- specs/light_client/merkle_proofs.md | 16 +++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 0a5171e8f..83f9a2145 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -37,7 +37,7 @@ from eth2spec.utils.bls import ( from eth2spec.utils.hash_function import hash ''' PHASE1_IMPORTS = '''from typing import ( - Any, Dict, Optional, Set, Sequence, MutableSequence, Tuple, Union, + Any, Dict, Optional, Set, Sequence, MutableSequence, NewType, Tuple, Union, ) from math import ( log2, @@ -70,6 +70,7 @@ from eth2spec.utils.hash_function import hash SSZVariableName = str +GeneralizedIndex = NewType('GeneralizedIndex', int) ''' SUNDRY_CONSTANTS_FUNCTIONS = ''' def ceillog2(x: uint64) -> int: diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index d7f0ab382..ce7dc647c 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -7,7 +7,6 @@ - [Merkle proof formats](#merkle-proof-formats) - [Table of contents](#table-of-contents) - - [Custom types](#custom-types) - [Helper functions](#helper-functions) - [Generalized Merkle tree index](#generalized-merkle-tree-index) - [SSZ object to index](#ssz-object-to-index) @@ -22,13 +21,6 @@ -## Custom types - -We define the following Python custom types for type hinting and readability: - -| - | - | - | -| `GeneralizedIndex` | `uint64` | the index of a node in a binary Merkle tree | - ## Helper functions ```python @@ -75,6 +67,8 @@ def merkle_tree(leaves: Sequence[Hash]) -> Sequence[Hash]: return o ``` +We define a custom type `GeneralizedIndex` as a Python integer type in this document. It can be represented as a Bitvector/Bitlist object as well. + We will define Merkle proofs in terms of generalized indices. ## SSZ object to index @@ -175,7 +169,7 @@ def get_generalized_index(typ: SSZType, path: Sequence[Union[int, SSZVariableNam else: pos, _, _ = get_item_position(typ, p) base_index = (GeneralizedIndex(2) if issubclass(typ, (List, Bytes)) else GeneralizedIndex(1)) - root = root * base_index * get_next_power_of_two(chunk_count(typ)) + pos + root = GeneralizedIndex(root * base_index * get_next_power_of_two(chunk_count(typ)) + pos) typ = get_elem_type(typ, p) return root ``` @@ -280,8 +274,8 @@ def get_helper_indices(indices: Sequence[GeneralizedIndex]) -> Sequence[Generali return sorted([ x for x in all_indices if ( not ( - generalized_index_child(x, GeneralizedIndex(0)) in all_indices and - generalized_index_child(x, GeneralizedIndex(1)) in all_indices + generalized_index_child(x, False) in all_indices and + generalized_index_child(x, True) in all_indices ) and not (x in indices) ) ], reverse=True)