diff --git a/scripts/build_spec.py b/scripts/build_spec.py index 99c5cd69d..0b1512111 100644 --- a/scripts/build_spec.py +++ b/scripts/build_spec.py @@ -25,7 +25,7 @@ from eth2spec.utils.ssz.ssz_impl import ( signing_root, ) from eth2spec.utils.ssz.ssz_typing import ( - Bit, Bool, Container, List, Vector, Bytes, uint64, + bit, boolean, Container, List, Vector, Bytes, uint64, Bytes4, Bytes32, Bytes48, Bytes96, ) from eth2spec.utils.bls import ( @@ -52,7 +52,7 @@ from eth2spec.utils.ssz.ssz_impl import ( is_empty, ) from eth2spec.utils.ssz.ssz_typing import ( - Bit, Bool, Container, List, Vector, Bytes, uint64, + bit, boolean, Container, List, Vector, Bytes, uint64, Bytes4, Bytes32, Bytes48, Bytes96, ) from eth2spec.utils.bls import ( @@ -174,7 +174,7 @@ def combine_constants(old_constants: Dict[str, str], new_constants: Dict[str, st ignored_dependencies = [ - 'Bit', 'Bool', 'Vector', 'List', 'Container', 'Hash', 'BLSPubkey', 'BLSSignature', 'Bytes', 'BytesN' + 'bit', 'boolean', 'Vector', 'List', 'Container', 'Hash', 'BLSPubkey', 'BLSSignature', 'Bytes', 'BytesN' 'Bytes4', 'Bytes32', 'Bytes48', 'Bytes96', 'uint8', 'uint16', 'uint32', 'uint64', 'uint128', 'uint256', 'bytes' # to be removed after updating spec doc diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index 56a6fd06a..7b9aeee4b 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -297,7 +297,7 @@ class Validator(Container): pubkey: BLSPubkey withdrawal_credentials: Hash # Commitment to pubkey for withdrawals and transfers effective_balance: Gwei # Balance at stake - slashed: Bool + slashed: boolean # Status epochs activation_eligibility_epoch: Epoch # When criteria for activation were met activation_epoch: Epoch @@ -337,7 +337,7 @@ class AttestationData(Container): ```python class AttestationDataAndCustodyBit(Container): data: AttestationData - custody_bit: Bit # Challengeable bit (SSZ-bool, 1 byte) for the custody of crosslink data + custody_bit: bit # Challengeable bit (SSZ-bool, 1 byte) for the custody of crosslink data ``` #### `IndexedAttestation` diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 3318fe45b..53c5649ed 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -15,9 +15,9 @@ - [Default values](#default-values) - [Illegal types](#illegal-types) - [Serialization](#serialization) - - [`"uintN"`](#uintn) - - [`"bool"`](#bool) - - [`"null`](#null) + - [`uintN`](#uintn) + - [`boolean`](#boolean) + - [`null`](#null) - [Vectors, containers, lists, unions](#vectors-containers-lists-unions) - [Deserialization](#deserialization) - [Merkleization](#merkleization) @@ -37,36 +37,45 @@ ## Typing ### Basic types -* `"uintN"`: `N`-bit unsigned integer (where `N in [8, 16, 32, 64, 128, 256]`) -* `"bool"`: `True` or `False` +* `uintN`: `N`-bit unsigned integer (where `N in [8, 16, 32, 64, 128, 256]`) +* `boolean`: `True` or `False` ### Composite types * **container**: ordered heterogeneous collection of values - * key-pair curly bracket notation `{}`, e.g. `{"foo": "uint64", "bar": "bool"}` + * python dataclass notation with key-type pairs, e.g. +```python +class ContainerExample(Container): + foo: uint64 + bar: boolean +``` * **vector**: ordered fixed-length homogeneous collection of values - * angle bracket notation `[type, N]`, e.g. `["uint64", N]` -* **list**: ordered variable-length homogeneous collection of values - * angle bracket notation `[type]`, e.g. `["uint64"]` + * notation `Vector[type, N]`, e.g. `Vector[uint64, N]` +* **list**: ordered variable-length homogeneous collection of values, with maximum length `N` + * notation `List[type, N]`, e.g. `List[uint64, N]` * **union**: union type containing one of the given subtypes - * round bracket notation `(type_1, type_2, ...)`, e.g. `("null", "uint64")` + * notation `Union[type_1, type_2, ...]`, e.g. `union[null, uint64]` +* **Bitvector**: a fixed-length list of `boolean` values + * notation `Bitvector[N]` +* **Bitlist**: a variable-length list of `boolean` values with maximum length `N` + * notation `Bitlist[N]` ### Variable-size and fixed-size -We recursively define "variable-size" types to be lists and unions and all types that contain a variable-size type. All other types are said to be "fixed-size". +We recursively define "variable-size" types to be lists, unions, `Bitlist` and all types that contain a variable-size type. All other types are said to be "fixed-size". ### Aliases For convenience we alias: -* `"byte"` to `"uint8"` (this is a basic type) -* `"bytes"` to `["byte"]` (this is *not* a basic type) -* `"bytesN"` to `["byte", N]` (this is *not* a basic type) -* `"null"`: `{}`, i.e. the empty container +* `bit` to `boolean` +* `byte` to `uint8` (this is a basic type) +* `BytesN` to `Vector[byte, N]` (this is *not* a basic type) +* `null`: `{}`, i.e. the empty container ### Default values -The default value of a type upon initialization is recursively defined using `0` for `"uintN"`, `False` for `"bool"`, and `[]` for lists. Unions default to the first type in the union (with type index zero), which is `"null"` if present in the union. +The default value of a type upon initialization is recursively defined using `0` for `uintN`, `False` for `boolean` and the elements of `Bitvector`, and `[]` for lists and `Bitlist`. Unions default to the first type in the union (with type index zero), which is `null` if present in the union. #### `is_empty` @@ -74,34 +83,50 @@ An SSZ object is called empty (and thus, `is_empty(object)` returns true) if it ### Illegal types -Empty vector types (i.e. `[subtype, 0]` for some `subtype`) are not legal. The `"null"` type is only legal as the first type in a union subtype (i.e. with type index zero). +Empty vector types (i.e. `[subtype, 0]` for some `subtype`) are not legal. The `null` type is only legal as the first type in a union subtype (i.e. with type index zero). ## Serialization -We recursively define the `serialize` function which consumes an object `value` (of the type specified) and returns a bytestring of type `"bytes"`. +We recursively define the `serialize` function which consumes an object `value` (of the type specified) and returns a bytestring of type `bytes`. *Note*: In the function definitions below (`serialize`, `hash_tree_root`, `signing_root`, `is_variable_size`, etc.) objects implicitly carry their type. -### `"uintN"` +### `uintN` ```python assert N in [8, 16, 32, 64, 128, 256] return value.to_bytes(N // 8, "little") ``` -### `"bool"` +### `boolean` ```python assert value in (True, False) return b"\x01" if value is True else b"\x00" ``` -### `"null"` +### `null` ```python return b"" ``` +### `Bitvector[N]` + +```python +as_integer = sum([value[i] << i for i in range(len(value))]) +return as_integer.to_bytes((N + 7) // 8, "little") +``` + +### `Bitlist[N]` + +Note that from the offset coding, the length (in bytes) of the bitlist is known. An additional leading `1` bit is added so that the length in bits will also be known. + +```python +as_integer = (1 << len(value)) + sum([value[i] << i for i in range(len(value))]) +return as_integer.to_bytes((as_integer.bit_length() + 7) // 8, "little") +``` + ### Vectors, containers, lists, unions ```python @@ -142,17 +167,33 @@ We first define helper functions: * `pack`: Given ordered objects of the same basic type, serialize them, pack them into `BYTES_PER_CHUNK`-byte chunks, right-pad the last chunk with zero bytes, and return the chunks. * `merkleize`: Given ordered `BYTES_PER_CHUNK`-byte chunks, if necessary append zero chunks so that the number of chunks is a power of two, Merkleize the chunks, and return the root. Note that `merkleize` on a single chunk is simply that chunk, i.e. the identity when the number of chunks is one. +* `pad`: given a list `l` and a length `N`, adds `N-len(l)` empty objects to the end of the list (the type of the empty object is implicit in the list type) * `mix_in_length`: Given a Merkle root `root` and a length `length` (`"uint256"` little-endian serialization) return `hash(root + length)`. * `mix_in_type`: Given a Merkle root `root` and a type_index `type_index` (`"uint256"` little-endian serialization) return `hash(root + type_index)`. We now define Merkleization `hash_tree_root(value)` of an object `value` recursively: * `merkleize(pack(value))` if `value` is a basic object or a vector of basic objects -* `mix_in_length(merkleize(pack(value)), len(value))` if `value` is a list of basic objects +* `mix_in_length(merkleize(pack(pad(value, N))), len(value))` if `value` is a list of basic objects * `merkleize([hash_tree_root(element) for element in value])` if `value` is a vector of composite objects or a container -* `mix_in_length(merkleize([hash_tree_root(element) for element in value]), len(value))` if `value` is a list of composite objects +* `mix_in_length(merkleize([hash_tree_root(element) for element in pad(value, N)]), len(value))` if `value` is a list of composite objects * `mix_in_type(merkleize(value.value), value.type_index)` if `value` is of union type +### Merkleization of `Bitvector[N]` + +```python +as_integer = sum([value[i] << i for i in range(len(value))]) +return merkleize(as_integer.to_bytes((N + 7) // 8, "little")) +``` + +### `Bitlist[N]` + +```python +as_integer = sum([value[i] << i for i in range(len(value))]) +return mix_in_length(merkleize(as_integer.to_bytes((N + 7) // 8, "little")), len(value)) +``` + + ## Self-signed containers Let `value` be a self-signed container object. The convention is that the signature (e.g. a `"bytes96"` BLS12-381 signature) be the last field of `value`. Further, the signed message for `value` is `signing_root(value) = hash_tree_root(truncate_last(value))` where `truncate_last` truncates the last element of `value`. diff --git a/test_libs/pyspec/eth2spec/debug/decode.py b/test_libs/pyspec/eth2spec/debug/decode.py index c0b53b0ef..c0b977ab3 100644 --- a/test_libs/pyspec/eth2spec/debug/decode.py +++ b/test_libs/pyspec/eth2spec/debug/decode.py @@ -1,13 +1,13 @@ from typing import Any from eth2spec.utils.ssz.ssz_impl import hash_tree_root from eth2spec.utils.ssz.ssz_typing import ( - SSZType, SSZValue, uint, Container, Bytes, List, Bool, + SSZType, SSZValue, uint, Container, Bytes, List, boolean, Vector, BytesN ) def decode(data: Any, typ: SSZType) -> SSZValue: - if issubclass(typ, (uint, Bool)): + if issubclass(typ, (uint, boolean)): return typ(data) elif issubclass(typ, (List, Vector)): return typ(decode(element, typ.elem_type) for element in data) diff --git a/test_libs/pyspec/eth2spec/debug/encode.py b/test_libs/pyspec/eth2spec/debug/encode.py index 02814e441..670f580b2 100644 --- a/test_libs/pyspec/eth2spec/debug/encode.py +++ b/test_libs/pyspec/eth2spec/debug/encode.py @@ -1,6 +1,6 @@ from eth2spec.utils.ssz.ssz_impl import hash_tree_root from eth2spec.utils.ssz.ssz_typing import ( - SSZValue, uint, Container, Bool + SSZValue, uint, Container, boolean ) @@ -10,7 +10,7 @@ def encode(value: SSZValue, include_hash_tree_roots=False): if value.type().byte_len > 8: return str(int(value)) return int(value) - elif isinstance(value, Bool): + elif isinstance(value, boolean): return value == 1 elif isinstance(value, list): # normal python lists, ssz-List, Vector return [encode(element, include_hash_tree_roots) for element in value] diff --git a/test_libs/pyspec/eth2spec/debug/random_value.py b/test_libs/pyspec/eth2spec/debug/random_value.py index c6efb722b..cdcba343a 100644 --- a/test_libs/pyspec/eth2spec/debug/random_value.py +++ b/test_libs/pyspec/eth2spec/debug/random_value.py @@ -2,7 +2,7 @@ from random import Random from enum import Enum from eth2spec.utils.ssz.ssz_typing import ( - SSZType, SSZValue, BasicValue, BasicType, uint, Container, Bytes, List, Bool, + SSZType, SSZValue, BasicValue, BasicType, uint, Container, Bytes, List, boolean, Vector, BytesN ) @@ -118,7 +118,7 @@ def get_random_bytes_list(rng: Random, length: int) -> bytes: def get_random_basic_value(rng: Random, typ: BasicType) -> BasicValue: - if issubclass(typ, Bool): + if issubclass(typ, boolean): return typ(rng.choice((True, False))) elif issubclass(typ, uint): assert typ.byte_len in UINT_BYTE_SIZES @@ -128,7 +128,7 @@ def get_random_basic_value(rng: Random, typ: BasicType) -> BasicValue: def get_min_basic_value(typ: BasicType) -> BasicValue: - if issubclass(typ, Bool): + if issubclass(typ, boolean): return typ(False) elif issubclass(typ, uint): assert typ.byte_len in UINT_BYTE_SIZES @@ -138,7 +138,7 @@ def get_min_basic_value(typ: BasicType) -> BasicValue: def get_max_basic_value(typ: BasicType) -> BasicValue: - if issubclass(typ, Bool): + if issubclass(typ, boolean): return typ(True) elif issubclass(typ, uint): assert typ.byte_len in UINT_BYTE_SIZES diff --git a/test_libs/pyspec/eth2spec/fuzzing/decoder.py b/test_libs/pyspec/eth2spec/fuzzing/decoder.py index e533ca5c2..130956235 100644 --- a/test_libs/pyspec/eth2spec/fuzzing/decoder.py +++ b/test_libs/pyspec/eth2spec/fuzzing/decoder.py @@ -19,7 +19,7 @@ def translate_typ(typ) -> ssz.BaseSedes: return ssz.Vector(translate_typ(typ.elem_type), typ.length) elif issubclass(typ, spec_ssz.List): return ssz.List(translate_typ(typ.elem_type)) - elif issubclass(typ, spec_ssz.Bool): + elif issubclass(typ, spec_ssz.boolean): return ssz.boolean elif issubclass(typ, spec_ssz.uint): if typ.byte_len == 1: @@ -64,7 +64,7 @@ def translate_value(value, typ): raise TypeError("invalid uint size") elif issubclass(typ, spec_ssz.List): return [translate_value(elem, typ.elem_type) for elem in value] - elif issubclass(typ, spec_ssz.Bool): + elif issubclass(typ, spec_ssz.boolean): return value elif issubclass(typ, spec_ssz.Vector): return typ(*(translate_value(elem, typ.elem_type) for elem in value)) diff --git a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py index b9c7b6d38..a7f6f9da1 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py @@ -1,7 +1,8 @@ from ..merkle_minimal import merkleize_chunks from ..hash_function import hash from .ssz_typing import ( - SSZValue, SSZType, BasicValue, BasicType, Series, Elements, Bool, Container, List, Bytes, uint, + SSZValue, SSZType, BasicValue, BasicType, Series, Elements, boolean, Container, List, Bytes, + Bitlist, Bitvector, uint, ) # SSZ Serialization @@ -13,7 +14,7 @@ BYTES_PER_LENGTH_OFFSET = 4 def serialize_basic(value: SSZValue): if isinstance(value, uint): return value.to_bytes(value.type().byte_len, 'little') - elif isinstance(value, Bool): + elif isinstance(value, boolean): if value: return b'\x01' else: @@ -39,6 +40,12 @@ def is_empty(obj: SSZValue): def serialize(obj: SSZValue): if isinstance(obj, BasicValue): return serialize_basic(obj) + elif isinstance(obj, Bitvector): + as_integer = sum([obj[i] << i for i in range(len(obj))]) + return as_integer.to_bytes((len(obj) + 7) // 8, "little") + elif isinstance(obj, Bitlist): + as_integer = (1 << len(obj)) + sum([obj[i] << i for i in range(len(obj))]) + return as_integer.to_bytes((as_integer.bit_length() + 7) // 8, "little") elif isinstance(obj, Series): return encode_series(obj) else: @@ -85,6 +92,12 @@ def encode_series(values: Series): def pack(values: Series): if isinstance(values, bytes): # Bytes and BytesN are already packed return values + elif isinstance(values, Bitvector): + as_integer = sum([values[i] << i for i in range(len(values))]) + return as_integer.to_bytes((values.length + 7) // 8, "little") + elif isinstance(values, Bitlist): + as_integer = (1 << len(values)) + sum([values[i] << i for i in range(len(values))]) + return as_integer.to_bytes((values.length + 7) // 8, "little") return b''.join([serialize_basic(value) for value in values]) @@ -134,7 +147,7 @@ def hash_tree_root(obj: SSZValue): else: raise Exception(f"Type not supported: {type(obj)}") - if isinstance(obj, (List, Bytes)): + if isinstance(obj, (List, Bytes, Bitlist)): return mix_in_length(merkleize_chunks(leaves, pad_to=chunk_count(obj.type())), len(obj)) else: return merkleize_chunks(leaves) diff --git a/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py b/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py index 58e66ca68..ea07359b2 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py @@ -31,7 +31,7 @@ class BasicValue(int, SSZValue, metaclass=BasicType): pass -class Bool(BasicValue): # can't subclass bool. +class boolean(BasicValue): # can't subclass bool. byte_len = 1 def __new__(cls, value: int): # int value, but can be any subclass of int (bool, Bit, Bool, etc...) @@ -48,7 +48,7 @@ class Bool(BasicValue): # can't subclass bool. # Alias for Bool -class Bit(Bool): +class bit(boolean): pass @@ -233,7 +233,7 @@ class ParamsMeta(SSZType): return f"{self.__name__}~{self.__class__.__name__}" def __repr__(self): - return self, self.__class__ + return f"{self.__name__}~{self.__class__.__name__}" def attr_from_params(self, p): # single key params are valid too. Wrap them in a tuple. @@ -280,11 +280,12 @@ class ElementsType(ParamsMeta): elem_type: SSZType length: int +class BitElementsType(ElementsType): + elem_type = boolean class Elements(ParamsBase, metaclass=ElementsType): pass - class BaseList(list, Elements): def __init__(self, *args): @@ -310,6 +311,10 @@ class BaseList(list, Elements): cls = self.__class__ return f"{cls.__name__}[{cls.elem_type.__name__}, {cls.length}]({', '.join(str(v) for v in self)})" + def __repr__(self): + cls = self.__class__ + return f"{cls.__name__}[{cls.elem_type.__name__}, {cls.length}]({', '.join(str(v) for v in self)})" + def __getitem__(self, k) -> SSZValue: if isinstance(k, int): # check if we are just doing a lookup, and not slicing if k < 0: @@ -337,6 +342,15 @@ class BaseList(list, Elements): # be explict about getting the last item, for the non-python readers, and negative-index safety return self[len(self) - 1] +class BaseBitfield(BaseList, metaclass=BitElementsType): + elem_type = bool + +class Bitlist(BaseBitfield): + pass + +class Bitvector(BaseBitfield): + pass + class List(BaseList): diff --git a/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_impl.py b/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_impl.py index 82fb4ec68..33badcf4a 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_impl.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_impl.py @@ -1,7 +1,7 @@ from typing import Iterable from .ssz_impl import serialize, hash_tree_root from .ssz_typing import ( - Bit, Bool, Container, List, Vector, Bytes, BytesN, + bit, boolean, Container, List, Vector, Bytes, BytesN, uint8, uint16, uint32, uint64, uint256, byte ) from ..hash_function import hash as bytes_hash @@ -74,10 +74,10 @@ def merge(a: str, branch: Iterable[str]) -> str: test_data = [ - ("bit F", Bit(False), "00", chunk("00")), - ("bit T", Bit(True), "01", chunk("01")), - ("bool F", Bool(False), "00", chunk("00")), - ("bool T", Bool(True), "01", chunk("01")), + ("bit F", bit(False), "00", chunk("00")), + ("bit T", bit(True), "01", chunk("01")), + ("boolean F", boolean(False), "00", chunk("00")), + ("boolean T", boolean(True), "01", chunk("01")), ("uint8 00", uint8(0x00), "00", chunk("00")), ("uint8 01", uint8(0x01), "01", chunk("01")), ("uint8 ab", uint8(0xab), "ab", chunk("ab")), diff --git a/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_typing.py b/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_typing.py index 2af742360..f746a29c9 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_typing.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/test_ssz_typing.py @@ -1,6 +1,6 @@ from .ssz_typing import ( SSZValue, SSZType, BasicValue, BasicType, Series, ElementsType, - Elements, Bit, Bool, Container, List, Vector, Bytes, BytesN, + Elements, bit, boolean, Container, List, Vector, Bytes, BytesN, byte, uint, uint8, uint16, uint32, uint64, uint128, uint256, Bytes32, Bytes48 ) @@ -22,8 +22,8 @@ def test_subclasses(): assert issubclass(u, SSZValue) assert isinstance(u, SSZType) assert isinstance(u, BasicType) - assert issubclass(Bool, BasicValue) - assert isinstance(Bool, BasicType) + assert issubclass(boolean, BasicValue) + assert isinstance(boolean, BasicType) for c in [Container, List, Vector, Bytes, BytesN]: assert issubclass(c, Series) @@ -45,16 +45,16 @@ def test_basic_instances(): assert isinstance(v, BasicValue) assert isinstance(v, SSZValue) - assert isinstance(Bool(True), BasicValue) - assert isinstance(Bool(False), BasicValue) - assert isinstance(Bit(True), Bool) - assert isinstance(Bit(False), Bool) + assert isinstance(boolean(True), BasicValue) + assert isinstance(boolean(False), BasicValue) + assert isinstance(bit(True), boolean) + assert isinstance(bit(False), boolean) def test_basic_value_bounds(): max = { - Bool: 2 ** 1, - Bit: 2 ** 1, + boolean: 2 ** 1, + bit: 2 ** 1, uint8: 2 ** (8 * 1), byte: 2 ** (8 * 1), uint16: 2 ** (8 * 2),