Spec checker tool (#8722)

* Add specdocs static code analyzer * docs pulling script * update content pulling script * add test * better parsing of incoming docs * update test * implements analyzer * separate tool * remove analyzer code * cleanup * deep source fixes * untrack raw specs files * add back phase0 defs * update spec texts * re-arrange code * updated spec list * cleanup * more comments and readme * add merkle proofs specs * add extra.md * mark wrong length issue * update readme * update readme * remove non-def snippets * update comment * check numrows * ignore last empty line Co-authored-by: Raul Jordan <raul@prysmaticlabs.com>
2026-01-08 23:18:15 -05:00 · 2021-04-15 17:54:07 +03:00
parent 169cd78bbd
commit 3d3b9d1217
11 changed files with 2137 additions and 0 deletions
--- a/tools/specs-checker/BUILD.bazel
+++ b/tools/specs-checker/BUILD.bazel
@@ -0,0 +1,28 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary")
+load("@prysm//tools/go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "check.go",
+        "download.go",
+        "main.go",
+    ],
+    embedsrcs = [
+        "data/specs/phase0/beacon-chain.md",
+        "data/specs/phase0/fork-choice.md",
+        "data/specs/phase0/validator.md",
+        "data/specs/phase0/weak-subjectivity.md",
+        "data/ssz/merkle-proofs.md",
+        "data/extra.md",
+    ],
+    importpath = "github.com/prysmaticlabs/prysm/tools/specs-checker",
+    visibility = ["//visibility:public"],
+    deps = ["@com_github_urfave_cli_v2//:go_default_library"],
+)
+
+go_binary(
+    name = "specs-checker",
+    embed = [":go_default_library"],
+    visibility = ["//visibility:public"],
+)
--- a/tools/specs-checker/README.md
+++ b/tools/specs-checker/README.md
@@ -0,0 +1,41 @@
+# Specs checker tool
+
+This simple tool helps downloading and parsing [ETH2 specs](https://github.com/ethereum/eth2.0-specs/tree/dev/specs), 
+to be later used for making sure that our reference comments match specs definitions precisely.
+
+### Updating the reference specs
+See `main.go` for a list of files to be downloaded, currently:
+```golang
+var specDirs = map[string][]string{
+	"specs/phase0": {
+		"beacon-chain.md",
+		"fork-choice.md",
+		"validator.md",
+		"weak-subjectivity.md",
+	},
+	"ssz": {
+		"merkle-proofs.md",
+	},
+}
+```
+
+To download/update specs:
+```bash
+bazel run //tools/specs-checker download -- --dir=$PWD/tools/specs-checker/data
+```
+
+This will pull the files defined in `specDirs`, parse them (extract Python code snippets, discarding any other text), 
+and save them to the folder from which `bazel run //tools/specs-checker check` will be able to embed.
+
+### Checking against the reference specs
+
+To check whether reference comments have the matching version of Python specs:
+```bash
+bazel run //tools/specs-checker check -- --dir $PWD/beacon-chain
+bazel run //tools/specs-checker check -- --dir $PWD/validator
+bazel run //tools/specs-checker check -- --dir $PWD/shared
+```
+Or, to check the whole project:
+```bash
+bazel run //tools/specs-checker check -- --dir $PWD
+```
--- a/tools/specs-checker/check.go
+++ b/tools/specs-checker/check.go
@@ -0,0 +1,176 @@
+package main
+
+import (
+	"fmt"
+	"go/ast"
+	"go/parser"
+	"go/token"
+	"os"
+	"path"
+	"path/filepath"
+	"regexp"
+	"strings"
+
+	"github.com/urfave/cli/v2"
+)
+
+// Regex to find Python's "def".
+var reg1 = regexp.MustCompile(`def\s(.*)\(.*`)
+
+// checkNumRows defines whether tool should check that the spec comment is the last comment of the block, so not only
+// it matches the reference snippet, but it also has the same number of rows.
+const checkNumRows = false
+
+func check(cliCtx *cli.Context) error {
+	// Obtain reference snippets.
+	defs, err := parseSpecs()
+	if err != nil {
+		return err
+	}
+
+	// Walk the path, and process all contained Golang files.
+	fileWalker := func(path string, info os.FileInfo, err error) error {
+		if info == nil {
+			return fmt.Errorf("invalid input dir %q", path)
+		}
+		if !strings.HasSuffix(info.Name(), ".go") {
+			return nil
+		}
+		return inspectFile(path, defs)
+	}
+	return filepath.Walk(cliCtx.String(dirFlag.Name), fileWalker)
+}
+
+func inspectFile(path string, defs map[string][]string) error {
+	// Parse source files, and check the pseudo code.
+	fset := token.NewFileSet()
+	file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
+	if err != nil {
+		return err
+	}
+
+	ast.Inspect(file, func(node ast.Node) bool {
+		stmt, ok := node.(*ast.CommentGroup)
+		if !ok {
+			return true
+		}
+		// Ignore comment groups that do not have python pseudo-code.
+		chunk := stmt.Text()
+		if !reg1.MatchString(chunk) {
+			return true
+		}
+
+		// Trim the chunk, so that it starts from Python's "def".
+		loc := reg1.FindStringIndex(chunk)
+		chunk = chunk[loc[0]:]
+
+		// Find out Python function name.
+		defName, defBody := parseDefChunk(chunk)
+		if defName == "" {
+			fmt.Printf("%s: cannot parse comment pseudo code\n", fset.Position(node.Pos()))
+			return false
+		}
+
+		// Calculate differences with reference implementation.
+		refDefs, ok := defs[defName]
+		if !ok {
+			fmt.Printf("%s: %q is not found in spec docs\n", fset.Position(node.Pos()), defName)
+			return false
+		}
+		if !matchesRefImplementation(defName, refDefs, defBody, fset.Position(node.Pos())) {
+			fmt.Printf("%s: %q code does not match reference implementation in specs\n", fset.Position(node.Pos()), defName)
+			return false
+		}
+
+		return true
+	})
+
+	return nil
+}
+
+// parseSpecs parses input spec docs into map of function name -> array of function bodies
+// (single entity may have several definitions).
+func parseSpecs() (map[string][]string, error) {
+	loadSpecsFile := func(sb *strings.Builder, specFilePath string) error {
+		chunk, err := specFS.ReadFile(specFilePath)
+		if err != nil {
+			return fmt.Errorf("cannot read specs file: %w", err)
+		}
+		_, err = sb.Write(chunk)
+		if err != nil {
+			return fmt.Errorf("cannot copy specs file: %w", err)
+		}
+		return nil
+	}
+
+	// Traverse all spec files, and aggregate them within as single string.
+	var sb strings.Builder
+	for dirName, fileNames := range specDirs {
+		for _, fileName := range fileNames {
+			if err := loadSpecsFile(&sb, path.Join("data", dirName, fileName)); err != nil {
+				return nil, err
+			}
+		}
+	}
+
+	// Load file with extra definitions (this allows us to use pseudo-code that is not from specs).
+	if err := loadSpecsFile(&sb, path.Join("data", "extra.md")); err != nil {
+		return nil, err
+	}
+
+	// Parse docs into function name -> array of function bodies map.
+	chunks := strings.Split(strings.ReplaceAll(sb.String(), "```python", ""), "```")
+	defs := make(map[string][]string, len(chunks))
+	for _, chunk := range chunks {
+		defName, defBody := parseDefChunk(chunk)
+		if defName == "" {
+			continue
+		}
+		defs[defName] = append(defs[defName], defBody)
+	}
+	return defs, nil
+}
+
+// parseDefChunk extract function name and function body from a Python's "def" chunk.
+func parseDefChunk(chunk string) (string, string) {
+	chunk = strings.TrimLeft(chunk, "\n")
+	if chunk == "" {
+		return "", ""
+	}
+	chunkLines := strings.Split(chunk, "\n")
+	// Ignore all snippets, that do not define functions.
+	if chunkLines[0][:4] != "def " {
+		return "", ""
+	}
+	defMatches := reg1.FindStringSubmatch(chunkLines[0])
+	if len(defMatches) < 2 {
+		return "", ""
+	}
+	return strings.Trim(defMatches[1], " "), chunk
+}
+
+// matchesRefImplementation compares input string to reference code snippets (there might be multiple implementations).
+func matchesRefImplementation(defName string, refDefs []string, input string, pos token.Position) bool {
+	for _, refDef := range refDefs {
+		refDefLines := strings.Split(strings.TrimRight(refDef, "\n"), "\n")
+		inputLines := strings.Split(strings.TrimRight(input, "\n"), "\n")
+
+		matchesPerfectly := true
+		for i := 0; i < len(refDefLines); i++ {
+			a, b := strings.Trim(refDefLines[i], " "), strings.Trim(inputLines[i], " ")
+			if a != b {
+				matchesPerfectly = false
+				break
+			}
+		}
+		// Mark potential issues, when there's some more comments in our code (which might be ok, as we are not required
+		// to put specs comments as the last one in the doc block).
+		if checkNumRows && len(refDefLines) != len(inputLines) {
+			fmt.Printf("%s: %q potentially has issues (comment is longer than reference implementation)\n", pos, defName)
+		}
+		if matchesPerfectly {
+			return true
+		}
+	}
+	return false
+}
--- a/tools/specs-checker/data/extra.md
+++ b/tools/specs-checker/data/extra.md
@@ -0,0 +1,15 @@
+```python
+def Sign(SK: int, message: Bytes) -> BLSSignature
+```
+```python
+def Verify(PK: BLSPubkey, message: Bytes, signature: BLSSignature) -> bool
+```
+```python
+def AggregateVerify(pairs: Sequence[PK: BLSPubkey, message: Bytes], signature: BLSSignature) -> bool
+```
+```python
+def FastAggregateVerify(PKs: Sequence[BLSPubkey], message: Bytes, signature: BLSSignature) -> bool
+```
+```python
+def Aggregate(signatures: Sequence[BLSSignature]) -> BLSSignature
+```
--- a/tools/specs-checker/data/specs/phase0/beacon-chain.md
+++ b/tools/specs-checker/data/specs/phase0/beacon-chain.md
--- a/tools/specs-checker/data/specs/phase0/fork-choice.md
+++ b/tools/specs-checker/data/specs/phase0/fork-choice.md
@@ -0,0 +1,259 @@
+```python
+def get_forkchoice_store(anchor_state: BeaconState, anchor_block: BeaconBlock) -> Store:
+    assert anchor_block.state_root == hash_tree_root(anchor_state)
+    anchor_root = hash_tree_root(anchor_block)
+    anchor_epoch = get_current_epoch(anchor_state)
+    justified_checkpoint = Checkpoint(epoch=anchor_epoch, root=anchor_root)
+    finalized_checkpoint = Checkpoint(epoch=anchor_epoch, root=anchor_root)
+    return Store(
+        time=uint64(anchor_state.genesis_time + SECONDS_PER_SLOT * anchor_state.slot),
+        genesis_time=anchor_state.genesis_time,
+        justified_checkpoint=justified_checkpoint,
+        finalized_checkpoint=finalized_checkpoint,
+        best_justified_checkpoint=justified_checkpoint,
+        blocks={anchor_root: copy(anchor_block)},
+        block_states={anchor_root: copy(anchor_state)},
+        checkpoint_states={justified_checkpoint: copy(anchor_state)},
+    )
+```
+```python
+def get_slots_since_genesis(store: Store) -> int:
+    return (store.time - store.genesis_time) // SECONDS_PER_SLOT
+```
+```python
+def get_current_slot(store: Store) -> Slot:
+    return Slot(GENESIS_SLOT + get_slots_since_genesis(store))
+```
+```python
+def compute_slots_since_epoch_start(slot: Slot) -> int:
+    return slot - compute_start_slot_at_epoch(compute_epoch_at_slot(slot))
+```
+```python
+def get_ancestor(store: Store, root: Root, slot: Slot) -> Root:
+    block = store.blocks[root]
+    if block.slot > slot:
+        return get_ancestor(store, block.parent_root, slot)
+    elif block.slot == slot:
+        return root
+    else:
+        # root is older than queried slot, thus a skip slot. Return most recent root prior to slot
+        return root
+```
+```python
+def get_latest_attesting_balance(store: Store, root: Root) -> Gwei:
+    state = store.checkpoint_states[store.justified_checkpoint]
+    active_indices = get_active_validator_indices(state, get_current_epoch(state))
+    return Gwei(sum(
+        state.validators[i].effective_balance for i in active_indices
+        if (i in store.latest_messages
+            and get_ancestor(store, store.latest_messages[i].root, store.blocks[root].slot) == root)
+    ))
+```
+```python
+def filter_block_tree(store: Store, block_root: Root, blocks: Dict[Root, BeaconBlock]) -> bool:
+    block = store.blocks[block_root]
+    children = [
+        root for root in store.blocks.keys()
+        if store.blocks[root].parent_root == block_root
+    ]
+
+    # If any children branches contain expected finalized/justified checkpoints,
+    # add to filtered block-tree and signal viability to parent.
+    if any(children):
+        filter_block_tree_result = [filter_block_tree(store, child, blocks) for child in children]
+        if any(filter_block_tree_result):
+            blocks[block_root] = block
+            return True
+        return False
+
+    # If leaf block, check finalized/justified checkpoints as matching latest.
+    head_state = store.block_states[block_root]
+
+    correct_justified = (
+        store.justified_checkpoint.epoch == GENESIS_EPOCH
+        or head_state.current_justified_checkpoint == store.justified_checkpoint
+    )
+    correct_finalized = (
+        store.finalized_checkpoint.epoch == GENESIS_EPOCH
+        or head_state.finalized_checkpoint == store.finalized_checkpoint
+    )
+    # If expected finalized/justified, add to viable block-tree and signal viability to parent.
+    if correct_justified and correct_finalized:
+        blocks[block_root] = block
+        return True
+
+    # Otherwise, branch not viable
+    return False
+```
+```python
+def get_filtered_block_tree(store: Store) -> Dict[Root, BeaconBlock]:
+    """
+    Retrieve a filtered block tree from ``store``, only returning branches
+    whose leaf state's justified/finalized info agrees with that in ``store``.
+    """
+    base = store.justified_checkpoint.root
+    blocks: Dict[Root, BeaconBlock] = {}
+    filter_block_tree(store, base, blocks)
+    return blocks
+```
+```python
+def get_head(store: Store) -> Root:
+    # Get filtered block tree that only includes viable branches
+    blocks = get_filtered_block_tree(store)
+    # Execute the LMD-GHOST fork choice
+    head = store.justified_checkpoint.root
+    while True:
+        children = [
+            root for root in blocks.keys()
+            if blocks[root].parent_root == head
+        ]
+        if len(children) == 0:
+            return head
+        # Sort by latest attesting balance with ties broken lexicographically
+        head = max(children, key=lambda root: (get_latest_attesting_balance(store, root), root))
+```
+```python
+def should_update_justified_checkpoint(store: Store, new_justified_checkpoint: Checkpoint) -> bool:
+    """
+    To address the bouncing attack, only update conflicting justified
+    checkpoints in the fork choice if in the early slots of the epoch.
+    Otherwise, delay incorporation of new justified checkpoint until next epoch boundary.
+
+    See https://ethresear.ch/t/prevention-of-bouncing-attack-on-ffg/6114 for more detailed analysis and discussion.
+    """
+    if compute_slots_since_epoch_start(get_current_slot(store)) < SAFE_SLOTS_TO_UPDATE_JUSTIFIED:
+        return True
+
+    justified_slot = compute_start_slot_at_epoch(store.justified_checkpoint.epoch)
+    if not get_ancestor(store, new_justified_checkpoint.root, justified_slot) == store.justified_checkpoint.root:
+        return False
+
+    return True
+```
+```python
+def validate_on_attestation(store: Store, attestation: Attestation) -> None:
+    target = attestation.data.target
+
+    # Attestations must be from the current or previous epoch
+    current_epoch = compute_epoch_at_slot(get_current_slot(store))
+    # Use GENESIS_EPOCH for previous when genesis to avoid underflow
+    previous_epoch = current_epoch - 1 if current_epoch > GENESIS_EPOCH else GENESIS_EPOCH
+    # If attestation target is from a future epoch, delay consideration until the epoch arrives
+    assert target.epoch in [current_epoch, previous_epoch]
+    assert target.epoch == compute_epoch_at_slot(attestation.data.slot)
+
+    # Attestations target be for a known block. If target block is unknown, delay consideration until the block is found
+    assert target.root in store.blocks
+
+    # Attestations must be for a known block. If block is unknown, delay consideration until the block is found
+    assert attestation.data.beacon_block_root in store.blocks
+    # Attestations must not be for blocks in the future. If not, the attestation should not be considered
+    assert store.blocks[attestation.data.beacon_block_root].slot <= attestation.data.slot
+
+    # LMD vote must be consistent with FFG vote target
+    target_slot = compute_start_slot_at_epoch(target.epoch)
+    assert target.root == get_ancestor(store, attestation.data.beacon_block_root, target_slot)
+
+    # Attestations can only affect the fork choice of subsequent slots.
+    # Delay consideration in the fork choice until their slot is in the past.
+    assert get_current_slot(store) >= attestation.data.slot + 1
+```
+```python
+def store_target_checkpoint_state(store: Store, target: Checkpoint) -> None:
+    # Store target checkpoint state if not yet seen
+    if target not in store.checkpoint_states:
+        base_state = copy(store.block_states[target.root])
+        if base_state.slot < compute_start_slot_at_epoch(target.epoch):
+            process_slots(base_state, compute_start_slot_at_epoch(target.epoch))
+        store.checkpoint_states[target] = base_state
+```
+```python
+def update_latest_messages(store: Store, attesting_indices: Sequence[ValidatorIndex], attestation: Attestation) -> None:
+    target = attestation.data.target
+    beacon_block_root = attestation.data.beacon_block_root
+    for i in attesting_indices:
+        if i not in store.latest_messages or target.epoch > store.latest_messages[i].epoch:
+            store.latest_messages[i] = LatestMessage(epoch=target.epoch, root=beacon_block_root)
+```
+```python
+def on_tick(store: Store, time: uint64) -> None:
+    previous_slot = get_current_slot(store)
+
+    # update store time
+    store.time = time
+
+    current_slot = get_current_slot(store)
+    # Not a new epoch, return
+    if not (current_slot > previous_slot and compute_slots_since_epoch_start(current_slot) == 0):
+        return
+    # Update store.justified_checkpoint if a better checkpoint is known
+    if store.best_justified_checkpoint.epoch > store.justified_checkpoint.epoch:
+        store.justified_checkpoint = store.best_justified_checkpoint
+```
+```python
+def on_block(store: Store, signed_block: SignedBeaconBlock) -> None:
+    block = signed_block.message
+    # Parent block must be known
+    assert block.parent_root in store.block_states
+    # Make a copy of the state to avoid mutability issues
+    pre_state = copy(store.block_states[block.parent_root])
+    # Blocks cannot be in the future. If they are, their consideration must be delayed until the are in the past.
+    assert get_current_slot(store) >= block.slot
+
+    # Check that block is later than the finalized epoch slot (optimization to reduce calls to get_ancestor)
+    finalized_slot = compute_start_slot_at_epoch(store.finalized_checkpoint.epoch)
+    assert block.slot > finalized_slot
+    # Check block is a descendant of the finalized block at the checkpoint finalized slot
+    assert get_ancestor(store, block.parent_root, finalized_slot) == store.finalized_checkpoint.root
+
+    # Check the block is valid and compute the post-state
+    state = pre_state.copy()
+    state_transition(state, signed_block, True)
+    # Add new block to the store
+    store.blocks[hash_tree_root(block)] = block
+    # Add new state for this block to the store
+    store.block_states[hash_tree_root(block)] = state
+
+    # Update justified checkpoint
+    if state.current_justified_checkpoint.epoch > store.justified_checkpoint.epoch:
+        if state.current_justified_checkpoint.epoch > store.best_justified_checkpoint.epoch:
+            store.best_justified_checkpoint = state.current_justified_checkpoint
+        if should_update_justified_checkpoint(store, state.current_justified_checkpoint):
+            store.justified_checkpoint = state.current_justified_checkpoint
+
+    # Update finalized checkpoint
+    if state.finalized_checkpoint.epoch > store.finalized_checkpoint.epoch:
+        store.finalized_checkpoint = state.finalized_checkpoint
+
+        # Potentially update justified if different from store
+        if store.justified_checkpoint != state.current_justified_checkpoint:
+            # Update justified if new justified is later than store justified
+            if state.current_justified_checkpoint.epoch > store.justified_checkpoint.epoch:
+                store.justified_checkpoint = state.current_justified_checkpoint
+                return
+
+            # Update justified if store justified is not in chain with finalized checkpoint
+            finalized_slot = compute_start_slot_at_epoch(store.finalized_checkpoint.epoch)
+            ancestor_at_finalized_slot = get_ancestor(store, store.justified_checkpoint.root, finalized_slot)
+            if ancestor_at_finalized_slot != store.finalized_checkpoint.root:
+                store.justified_checkpoint = state.current_justified_checkpoint
+```
+```python
+def on_attestation(store: Store, attestation: Attestation) -> None:
+    """
+    Run ``on_attestation`` upon receiving a new ``attestation`` from either within a block or directly on the wire.
+
+    An ``attestation`` that is asserted as invalid may be valid at a later time,
+    consider scheduling it for later processing in such case.
+    """
+    validate_on_attestation(store, attestation)
+    store_target_checkpoint_state(store, attestation.data.target)
+
+    # Get state at the `target` to fully validate attestation
+    target_state = store.checkpoint_states[attestation.data.target]
+    indexed_attestation = get_indexed_attestation(target_state, attestation)
+    assert is_valid_indexed_attestation(target_state, indexed_attestation)
+
+    # Update latest messages for attesting indices
+    update_latest_messages(store, indexed_attestation.attesting_indices, attestation)
+```
--- a/tools/specs-checker/data/specs/phase0/validator.md
+++ b/tools/specs-checker/data/specs/phase0/validator.md
@@ -0,0 +1,150 @@
+```python
+def check_if_validator_active(state: BeaconState, validator_index: ValidatorIndex) -> bool:
+    validator = state.validators[validator_index]
+    return is_active_validator(validator, get_current_epoch(state))
+```
+```python
+def get_committee_assignment(state: BeaconState,
+                             epoch: Epoch,
+                             validator_index: ValidatorIndex
+                             ) -> Optional[Tuple[Sequence[ValidatorIndex], CommitteeIndex, Slot]]:
+    """
+    Return the committee assignment in the ``epoch`` for ``validator_index``.
+    ``assignment`` returned is a tuple of the following form:
+        * ``assignment[0]`` is the list of validators in the committee
+        * ``assignment[1]`` is the index to which the committee is assigned
+        * ``assignment[2]`` is the slot at which the committee is assigned
+    Return None if no assignment.
+    """
+    next_epoch = Epoch(get_current_epoch(state) + 1)
+    assert epoch <= next_epoch
+
+    start_slot = compute_start_slot_at_epoch(epoch)
+    committee_count_per_slot = get_committee_count_per_slot(state, epoch)
+    for slot in range(start_slot, start_slot + SLOTS_PER_EPOCH):
+        for index in range(committee_count_per_slot):
+            committee = get_beacon_committee(state, Slot(slot), CommitteeIndex(index))
+            if validator_index in committee:
+                return committee, CommitteeIndex(index), Slot(slot)
+    return None
+```
+```python
+def is_proposer(state: BeaconState, validator_index: ValidatorIndex) -> bool:
+    return get_beacon_proposer_index(state) == validator_index
+```
+```python
+def get_epoch_signature(state: BeaconState, block: BeaconBlock, privkey: int) -> BLSSignature:
+    domain = get_domain(state, DOMAIN_RANDAO, compute_epoch_at_slot(block.slot))
+    signing_root = compute_signing_root(compute_epoch_at_slot(block.slot), domain)
+    return bls.Sign(privkey, signing_root)
+```
+```python
+def compute_time_at_slot(state: BeaconState, slot: Slot) -> uint64:
+    return uint64(state.genesis_time + slot * SECONDS_PER_SLOT)
+```
+```python
+def voting_period_start_time(state: BeaconState) -> uint64:
+    eth1_voting_period_start_slot = Slot(state.slot - state.slot % (EPOCHS_PER_ETH1_VOTING_PERIOD * SLOTS_PER_EPOCH))
+    return compute_time_at_slot(state, eth1_voting_period_start_slot)
+```
+```python
+def is_candidate_block(block: Eth1Block, period_start: uint64) -> bool:
+    return (
+        block.timestamp + SECONDS_PER_ETH1_BLOCK * ETH1_FOLLOW_DISTANCE <= period_start
+        and block.timestamp + SECONDS_PER_ETH1_BLOCK * ETH1_FOLLOW_DISTANCE * 2 >= period_start
+    )
+```
+```python
+def get_eth1_vote(state: BeaconState, eth1_chain: Sequence[Eth1Block]) -> Eth1Data:
+    period_start = voting_period_start_time(state)
+    # `eth1_chain` abstractly represents all blocks in the eth1 chain sorted by ascending block height
+    votes_to_consider = [
+        get_eth1_data(block) for block in eth1_chain
+        if (
+            is_candidate_block(block, period_start)
+            # Ensure cannot move back to earlier deposit contract states
+            and get_eth1_data(block).deposit_count >= state.eth1_data.deposit_count
+        )
+    ]
+
+    # Valid votes already cast during this period
+    valid_votes = [vote for vote in state.eth1_data_votes if vote in votes_to_consider]
+
+    # Default vote on latest eth1 block data in the period range unless eth1 chain is not live
+    # Non-substantive casting for linter
+    state_eth1_data: Eth1Data = state.eth1_data
+    default_vote = votes_to_consider[len(votes_to_consider) - 1] if any(votes_to_consider) else state_eth1_data
+
+    return max(
+        valid_votes,
+        key=lambda v: (valid_votes.count(v), -valid_votes.index(v)),  # Tiebreak by smallest distance
+        default=default_vote
+    )
+```
+```python
+def compute_new_state_root(state: BeaconState, block: BeaconBlock) -> Root:
+    temp_state: BeaconState = state.copy()
+    signed_block = SignedBeaconBlock(message=block)
+    state_transition(temp_state, signed_block, validate_result=False)
+    return hash_tree_root(temp_state)
+```
+```python
+def get_block_signature(state: BeaconState, block: BeaconBlock, privkey: int) -> BLSSignature:
+    domain = get_domain(state, DOMAIN_BEACON_PROPOSER, compute_epoch_at_slot(block.slot))
+    signing_root = compute_signing_root(block, domain)
+    return bls.Sign(privkey, signing_root)
+```
+```python
+def get_attestation_signature(state: BeaconState, attestation_data: AttestationData, privkey: int) -> BLSSignature:
+    domain = get_domain(state, DOMAIN_BEACON_ATTESTER, attestation_data.target.epoch)
+    signing_root = compute_signing_root(attestation_data, domain)
+    return bls.Sign(privkey, signing_root)
+```
+```python
+def compute_subnet_for_attestation(committees_per_slot: uint64, slot: Slot, committee_index: CommitteeIndex) -> uint64:
+    """
+    Compute the correct subnet for an attestation for Phase 0.
+    Note, this mimics expected future behavior where attestations will be mapped to their shard subnet.
+    """
+    slots_since_epoch_start = uint64(slot % SLOTS_PER_EPOCH)
+    committees_since_epoch_start = committees_per_slot * slots_since_epoch_start
+
+    return uint64((committees_since_epoch_start + committee_index) % ATTESTATION_SUBNET_COUNT)
+```
+```python
+def get_slot_signature(state: BeaconState, slot: Slot, privkey: int) -> BLSSignature:
+    domain = get_domain(state, DOMAIN_SELECTION_PROOF, compute_epoch_at_slot(slot))
+    signing_root = compute_signing_root(slot, domain)
+    return bls.Sign(privkey, signing_root)
+```
+```python
+def is_aggregator(state: BeaconState, slot: Slot, index: CommitteeIndex, slot_signature: BLSSignature) -> bool:
+    committee = get_beacon_committee(state, slot, index)
+    modulo = max(1, len(committee) // TARGET_AGGREGATORS_PER_COMMITTEE)
+    return bytes_to_uint64(hash(slot_signature)[0:8]) % modulo == 0
+```
+```python
+def get_aggregate_signature(attestations: Sequence[Attestation]) -> BLSSignature:
+    signatures = [attestation.signature for attestation in attestations]
+    return bls.Aggregate(signatures)
+```
+```python
+def get_aggregate_and_proof(state: BeaconState,
+                            aggregator_index: ValidatorIndex,
+                            aggregate: Attestation,
+                            privkey: int) -> AggregateAndProof:
+    return AggregateAndProof(
+        aggregator_index=aggregator_index,
+        aggregate=aggregate,
+        selection_proof=get_slot_signature(state, aggregate.data.slot, privkey),
+    )
+```
+```python
+def get_aggregate_and_proof_signature(state: BeaconState,
+                                      aggregate_and_proof: AggregateAndProof,
+                                      privkey: int) -> BLSSignature:
+    aggregate = aggregate_and_proof.aggregate
+    domain = get_domain(state, DOMAIN_AGGREGATE_AND_PROOF, compute_epoch_at_slot(aggregate.data.slot))
+    signing_root = compute_signing_root(aggregate_and_proof, domain)
+    return bls.Sign(privkey, signing_root)
+```
--- a/tools/specs-checker/data/specs/phase0/weak-subjectivity.md
+++ b/tools/specs-checker/data/specs/phase0/weak-subjectivity.md
@@ -0,0 +1,44 @@
+```python
+def compute_weak_subjectivity_period(state: BeaconState) -> uint64:
+    """
+    Returns the weak subjectivity period for the current ``state``. 
+    This computation takes into account the effect of:
+        - validator set churn (bounded by ``get_validator_churn_limit()`` per epoch), and 
+        - validator balance top-ups (bounded by ``MAX_DEPOSITS * SLOTS_PER_EPOCH`` per epoch).
+    A detailed calculation can be found at:
+    https://github.com/runtimeverification/beacon-chain-verification/blob/master/weak-subjectivity/weak-subjectivity-analysis.pdf
+    """
+    ws_period = MIN_VALIDATOR_WITHDRAWABILITY_DELAY
+    N = len(get_active_validator_indices(state, get_current_epoch(state)))
+    t = get_total_active_balance(state) // N // ETH_TO_GWEI
+    T = MAX_EFFECTIVE_BALANCE // ETH_TO_GWEI
+    delta = get_validator_churn_limit(state)
+    Delta = MAX_DEPOSITS * SLOTS_PER_EPOCH
+    D = SAFETY_DECAY
+
+    if T * (200 + 3 * D) < t * (200 + 12 * D):
+        epochs_for_validator_set_churn = (
+            N * (t * (200 + 12 * D) - T * (200 + 3 * D)) // (600 * delta * (2 * t + T))
+        )
+        epochs_for_balance_top_ups = (
+            N * (200 + 3 * D) // (600 * Delta)
+        )
+        ws_period += max(epochs_for_validator_set_churn, epochs_for_balance_top_ups)
+    else:
+        ws_period += (
+            3 * N * D * t // (200 * Delta * (T - t))
+        )
+    
+    return ws_period
+```
+```python
+def is_within_weak_subjectivity_period(store: Store, ws_state: BeaconState, ws_checkpoint: Checkpoint) -> bool:
+    # Clients may choose to validate the input state against the input Weak Subjectivity Checkpoint
+    assert ws_state.latest_block_header.state_root == ws_checkpoint.root
+    assert compute_epoch_at_slot(ws_state.slot) == ws_checkpoint.epoch
+
+    ws_period = compute_weak_subjectivity_period(ws_state)
+    ws_state_epoch = compute_epoch_at_slot(ws_state.slot)
+    current_epoch = compute_epoch_at_slot(get_current_slot(store))
+    return current_epoch <= ws_state_epoch + ws_period
+```
--- a/tools/specs-checker/data/ssz/merkle-proofs.md
+++ b/tools/specs-checker/data/ssz/merkle-proofs.md
@@ -0,0 +1,240 @@
+```python
+def get_power_of_two_ceil(x: int) -> int:
+    """
+    Get the power of 2 for given input, or the closest higher power of 2 if the input is not a power of 2.
+    Commonly used for "how many nodes do I need for a bottom tree layer fitting x elements?"
+    Example: 0->1, 1->1, 2->2, 3->4, 4->4, 5->8, 6->8, 7->8, 8->8, 9->16.
+    """
+    if x <= 1:
+        return 1
+    elif x == 2:
+        return 2
+    else:
+        return 2 * get_power_of_two_ceil((x + 1) // 2)
+```
+```python
+def get_power_of_two_floor(x: int) -> int:
+    """
+    Get the power of 2 for given input, or the closest lower power of 2 if the input is not a power of 2.
+    The zero case is a placeholder and not used for math with generalized indices.
+    Commonly used for "what power of two makes up the root bit of the generalized index?"
+    Example: 0->1, 1->1, 2->2, 3->2, 4->4, 5->4, 6->4, 7->4, 8->8, 9->8
+    """
+    if x <= 1:
+        return 1
+    if x == 2:
+        return x
+    else:
+        return 2 * get_power_of_two_floor(x // 2)
+```
+```python
+def merkle_tree(leaves: Sequence[Bytes32]) -> Sequence[Bytes32]:
+    """
+    Return an array representing the tree nodes by generalized index: 
+    [0, 1, 2, 3, 4, 5, 6, 7], where each layer is a power of 2. The 0 index is ignored. The 1 index is the root.
+    The result will be twice the size as the padded bottom layer for the input leaves.
+    """
+    bottom_length = get_power_of_two_ceil(len(leaves))
+    o = [Bytes32()] * bottom_length + list(leaves) + [Bytes32()] * (bottom_length - len(leaves))
+    for i in range(bottom_length - 1, 0, -1):
+        o[i] = hash(o[i * 2] + o[i * 2 + 1])
+    return o
+```
+```python
+def item_length(typ: SSZType) -> int:
+    """
+    Return the number of bytes in a basic type, or 32 (a full hash) for compound types.
+    """
+    if issubclass(typ, BasicValue):
+        return typ.byte_len
+    else:
+        return 32
+```
+```python
+def get_elem_type(typ: Union[BaseBytes, BaseList, Container],
+                  index_or_variable_name: Union[int, SSZVariableName]) -> SSZType:
+    """
+    Return the type of the element of an object of the given type with the given index
+    or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`)
+    """
+    return typ.get_fields()[index_or_variable_name] if issubclass(typ, Container) else typ.elem_type
+```
+```python
+def chunk_count(typ: SSZType) -> int:
+    """
+    Return the number of hashes needed to represent the top-level elements in the given type
+    (eg. `x.foo` or `x[7]` but not `x[7].bar` or `x.foo.baz`). In all cases except lists/vectors
+    of basic types, this is simply the number of top-level elements, as each element gets one
+    hash. For lists/vectors of basic types, it is often fewer because multiple basic elements
+    can be packed into one 32-byte chunk.
+    """
+    # typ.length describes the limit for list types, or the length for vector types.
+    if issubclass(typ, BasicValue):
+        return 1
+    elif issubclass(typ, Bits):
+        return (typ.length + 255) // 256
+    elif issubclass(typ, Elements):
+        return (typ.length * item_length(typ.elem_type) + 31) // 32
+    elif issubclass(typ, Container):
+        return len(typ.get_fields())
+    else:
+        raise Exception(f"Type not supported: {typ}")
+```
+```python
+def get_item_position(typ: SSZType, index_or_variable_name: Union[int, SSZVariableName]) -> Tuple[int, int, int]:
+    """
+    Return three variables:
+        (i) the index of the chunk in which the given element of the item is represented;
+        (ii) the starting byte position within the chunk;
+        (iii) the ending byte position within the chunk.
+    For example: for a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16)
+    """
+    if issubclass(typ, Elements):
+        index = int(index_or_variable_name)
+        start = index * item_length(typ.elem_type)
+        return start // 32, start % 32, start % 32 + item_length(typ.elem_type)
+    elif issubclass(typ, Container):
+        variable_name = index_or_variable_name
+        return typ.get_field_names().index(variable_name), 0, item_length(get_elem_type(typ, variable_name))
+    else:
+        raise Exception("Only lists/vectors/containers supported")
+```
+```python
+def get_generalized_index(typ: SSZType, path: Sequence[Union[int, SSZVariableName]]) -> GeneralizedIndex:
+    """
+    Converts a path (eg. `[7, "foo", 3]` for `x[7].foo[3]`, `[12, "bar", "__len__"]` for
+    `len(x[12].bar)`) into the generalized index representing its position in the Merkle tree.
+    """
+    root = GeneralizedIndex(1)
+    for p in path:
+        assert not issubclass(typ, BasicValue)  # If we descend to a basic type, the path cannot continue further
+        if p == '__len__':
+            typ = uint64
+            assert issubclass(typ, (List, ByteList))
+            root = GeneralizedIndex(root * 2 + 1)
+        else:
+            pos, _, _ = get_item_position(typ, p)
+            base_index = (GeneralizedIndex(2) if issubclass(typ, (List, ByteList)) else GeneralizedIndex(1))
+            root = GeneralizedIndex(root * base_index * get_power_of_two_ceil(chunk_count(typ)) + pos)
+            typ = get_elem_type(typ, p)
+    return root
+```
+```python
+def concat_generalized_indices(*indices: GeneralizedIndex) -> GeneralizedIndex:
+    """
+    Given generalized indices i1 for A -> B, i2 for B -> C .... i_n for Y -> Z, returns
+    the generalized index for A -> Z.
+    """
+    o = GeneralizedIndex(1)
+    for i in indices:
+        o = GeneralizedIndex(o * get_power_of_two_floor(i) + (i - get_power_of_two_floor(i)))
+    return o
+```
+```python
+def get_generalized_index_length(index: GeneralizedIndex) -> int:
+    """
+    Return the length of a path represented by a generalized index.
+    """
+    return int(log2(index))
+```
+```python
+def get_generalized_index_bit(index: GeneralizedIndex, position: int) -> bool:
+    """
+    Return the given bit of a generalized index.
+    """
+    return (index & (1 << position)) > 0
+```
+```python
+def generalized_index_sibling(index: GeneralizedIndex) -> GeneralizedIndex:
+    return GeneralizedIndex(index ^ 1)
+```
+```python
+def generalized_index_child(index: GeneralizedIndex, right_side: bool) -> GeneralizedIndex:
+    return GeneralizedIndex(index * 2 + right_side)
+```
+```python
+def generalized_index_parent(index: GeneralizedIndex) -> GeneralizedIndex:
+    return GeneralizedIndex(index // 2)
+```
+```python
+def get_branch_indices(tree_index: GeneralizedIndex) -> Sequence[GeneralizedIndex]:
+    """
+    Get the generalized indices of the sister chunks along the path from the chunk with the
+    given tree index to the root.
+    """
+    o = [generalized_index_sibling(tree_index)]
+    while o[-1] > 1:
+        o.append(generalized_index_sibling(generalized_index_parent(o[-1])))
+    return o[:-1]
+```
+```python
+def get_path_indices(tree_index: GeneralizedIndex) -> Sequence[GeneralizedIndex]:
+    """
+    Get the generalized indices of the chunks along the path from the chunk with the
+    given tree index to the root.
+    """
+    o = [tree_index]
+    while o[-1] > 1:
+        o.append(generalized_index_parent(o[-1]))
+    return o[:-1]
+```
+```python
+def get_helper_indices(indices: Sequence[GeneralizedIndex]) -> Sequence[GeneralizedIndex]:
+    """
+    Get the generalized indices of all "extra" chunks in the tree needed to prove the chunks with the given
+    generalized indices. Note that the decreasing order is chosen deliberately to ensure equivalence to the
+    order of hashes in a regular single-item Merkle proof in the single-item case.
+    """
+    all_helper_indices: Set[GeneralizedIndex] = set()
+    all_path_indices: Set[GeneralizedIndex] = set()
+    for index in indices:
+        all_helper_indices = all_helper_indices.union(set(get_branch_indices(index)))
+        all_path_indices = all_path_indices.union(set(get_path_indices(index)))
+
+    return sorted(all_helper_indices.difference(all_path_indices), reverse=True)
+```
+```python
+def calculate_merkle_root(leaf: Bytes32, proof: Sequence[Bytes32], index: GeneralizedIndex) -> Root:
+    assert len(proof) == get_generalized_index_length(index)
+    for i, h in enumerate(proof):
+        if get_generalized_index_bit(index, i):
+            leaf = hash(h + leaf)
+        else:
+            leaf = hash(leaf + h)
+    return leaf
+```
+```python
+def verify_merkle_proof(leaf: Bytes32, proof: Sequence[Bytes32], index: GeneralizedIndex, root: Root) -> bool:
+    return calculate_merkle_root(leaf, proof, index) == root
+```
+```python
+def calculate_multi_merkle_root(leaves: Sequence[Bytes32],
+                                proof: Sequence[Bytes32],
+                                indices: Sequence[GeneralizedIndex]) -> Root:
+    assert len(leaves) == len(indices)
+    helper_indices = get_helper_indices(indices)
+    assert len(proof) == len(helper_indices)
+    objects = {
+        **{index: node for index, node in zip(indices, leaves)},
+        **{index: node for index, node in zip(helper_indices, proof)}
+    }
+    keys = sorted(objects.keys(), reverse=True)
+    pos = 0
+    while pos < len(keys):
+        k = keys[pos]
+        if k in objects and k ^ 1 in objects and k // 2 not in objects:
+            objects[GeneralizedIndex(k // 2)] = hash(
+                objects[GeneralizedIndex((k | 1) ^ 1)] +
+                objects[GeneralizedIndex(k | 1)]
+            )
+            keys.append(GeneralizedIndex(k // 2))
+        pos += 1
+    return objects[GeneralizedIndex(1)]
+```
+```python
+def verify_merkle_multiproof(leaves: Sequence[Bytes32],
+                             proof: Sequence[Bytes32],
+                             indices: Sequence[GeneralizedIndex],
+                             root: Root) -> bool:
+    return calculate_multi_merkle_root(leaves, proof, indices) == root
+```
--- a/tools/specs-checker/download.go
+++ b/tools/specs-checker/download.go
@@ -0,0 +1,83 @@
+package main
+
+import (
+	_ "embed"
+	"fmt"
+	"io/ioutil"
+	"net/http"
+	"os"
+	"path"
+	"regexp"
+
+	"github.com/urfave/cli/v2"
+)
+
+const baseUrl = "https://raw.githubusercontent.com/ethereum/eth2.0-specs/dev"
+
+// Regex to find Python's code snippets in markdown.
+var reg2 = regexp.MustCompile(`(?msU)^\x60\x60\x60python\n+def\s(.*)^\x60\x60\x60`)
+
+func download(cliCtx *cli.Context) error {
+	fmt.Print("Downloading specs:\n")
+	baseDir := cliCtx.String(dirFlag.Name)
+	for dirName, fileNames := range specDirs {
+		if err := prepareDir(path.Join(baseDir, dirName)); err != nil {
+			return err
+		}
+		for _, fileName := range fileNames {
+			outFilePath := path.Join(baseDir, dirName, fileName)
+			specDocUrl := fmt.Sprintf("%s/%s", baseUrl, fmt.Sprintf("%s/%s", dirName, fileName))
+			fmt.Printf("- %s\n", specDocUrl)
+			if err := getAndSaveFile(specDocUrl, outFilePath); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+func getAndSaveFile(specDocUrl, outFilePath string) error {
+	// Create output file.
+	f, err := os.Create(outFilePath)
+	if err != nil {
+		return fmt.Errorf("cannot create output file: %w", err)
+	}
+	defer func() {
+		if err := f.Close(); err != nil {
+			fmt.Printf("cannot close output file: %v", err)
+		}
+	}()
+
+	// Download spec doc.
+	resp, err := http.Get(specDocUrl)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if err := resp.Body.Close(); err != nil {
+			fmt.Printf("cannot close spec doc file: %v", err)
+		}
+	}()
+
+	// Transform and save spec docs.
+	specDoc, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return err
+	}
+	specDocString := string(specDoc)
+	for _, snippet := range reg2.FindAllString(specDocString, -1) {
+		if _, err = f.WriteString(snippet + "\n"); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func prepareDir(dirPath string) error {
+	if err := os.MkdirAll(dirPath, os.ModePerm); err != nil {
+		return err
+	}
+	return nil
+}
--- a/tools/specs-checker/main.go
+++ b/tools/specs-checker/main.go
@@ -0,0 +1,64 @@
+package main
+
+import (
+	"embed"
+	"log"
+	"os"
+
+	"github.com/urfave/cli/v2"
+)
+
+var (
+	dirFlag = &cli.StringFlag{
+		Name:     "dir",
+		Value:    "",
+		Usage:    "Target directory",
+		Required: true,
+	}
+)
+
+//go:embed data
+var specFS embed.FS
+
+var specDirs = map[string][]string{
+	"specs/phase0": {
+		"beacon-chain.md",
+		"fork-choice.md",
+		"validator.md",
+		"weak-subjectivity.md",
+	},
+	"ssz": {
+		"merkle-proofs.md",
+	},
+}
+
+func main() {
+	app := &cli.App{
+		Name:        "Specs checker utility",
+		Description: "Checks that specs pseudo code used in comments is up to date",
+		Usage:       "helps keeping specs pseudo code up to date!",
+		Commands: []*cli.Command{
+			{
+				Name:  "check",
+				Usage: "Checks that all doc strings",
+				Flags: []cli.Flag{
+					dirFlag,
+				},
+				Action: check,
+			},
+			{
+				Name:   "download",
+				Usage:  "Downloads the latest specs docs",
+				Action: download,
+				Flags: []cli.Flag{
+					dirFlag,
+				},
+			},
+		},
+	}
+
+	err := app.Run(os.Args)
+	if err != nil {
+		log.Fatal(err)
+	}
+}