Files
prysm/encoding/ssz/query/path.go
fernantho 10a2f0687b SSZ-QL: calculate generalized indices for elements (#15873)
* added tests for calculating generalized indices

* added first version of GI calculation walking the specified path with no recursion. Extended test coverage for bitlist and bitvectors.
vectors need more testing

* refactored code. Detached PathElement processing, currently done at the beginning. Swap to regex to gain flexibility.

* added an updateRoot function with the GI formula. more refactoring

* added changelog

* replaced TODO tag

* udpated some comments

* simplified code - removed duplicated code in processingLengthField function

* run gazelle

* merging all input path processing into path.go

* reviewed Jun's feedback

* removed unnecessary idx pointer var + fixed error with length data type (uint64 instead of uint8)

* refactored path.go after merging path elements from generalized_indices.go

* re-computed GIs for tests as VariableTestContainer added a new field.

* added minor comment - rawPath MUST be snake case

removed extractFieldName func.

* fixed vector GI calculation - updated tests GIs

* removed updateRoot function in favor of inline code

* path input data enforced to be snake case

* added sanity checks for accessing outbound element indices - checked against vector.length/list.limit

* fixed issues triggered after merging develop

* Removed redundant comment

Co-authored-by: Jun Song <87601811+syjn99@users.noreply.github.com>

* removed unreachable condition as `strings.Split` always return a slice with length >= 1

If s does not contain sep and sep is not empty, Split returns a slice of
length 1 whose only element is s.

* added tests to cover edge cases + cleaned code (toLower is no longer needed in extractFieldName function

* added Jun's feedback + more testing

* postponed snake case conversion to do it on a per-element-basis. Added more testing focused mainly in snake case conversion

* addressed several Jun's comments.

* added sanity check to prevent length of a multi-dimensional array. added more tests with extended paths

* Update encoding/ssz/query/generalized_index.go

Co-authored-by: Radosław Kapka <radoslaw.kapka@gmail.com>

* Update encoding/ssz/query/generalized_index.go

Co-authored-by: Radosław Kapka <radoslaw.kapka@gmail.com>

* Update encoding/ssz/query/generalized_index.go

Co-authored-by: Radosław Kapka <radoslaw.kapka@gmail.com>

* placed constant bitsPerChunk in the right place. Exported BitsPerChunk and BytesPerChunk and updated code that use them

* added helpers for computing GI of each data type

* changed %q in favor of %s

* Update encoding/ssz/query/path.go

Co-authored-by: Jun Song <87601811+syjn99@users.noreply.github.com>

* removed the least restrictive condition isBasicType

* replaced length of containerInfo.order for containerInfo.fields for clarity

* removed outdated comment

* removed toSnakeCase conversion.

* moved isBasicType func to its natural place, SSZType

* cosmetic refactor

- renamed itemLengthFromInfo to itemLength (same name is in spec).
- arranged all SSZ helpers.

* cleaned tests

* renamed "root" to "index"

* removed unnecessary check for negative integers. Replaced %q for %s.

* refactored regex variables and prevented re-assignation

* added length regex explanation

* added more testing for stressing regex for path processing

* renamed currentIndex to parentIndex for clarity and documented the returns from calculate<Type>GeneralizedIndex functions

* Update encoding/ssz/query/generalized_index.go

Co-authored-by: Radosław Kapka <radoslaw.kapka@gmail.com>

* run gazelle

* fixed never asserted error. Updated error message

---------

Co-authored-by: Jun Song <87601811+syjn99@users.noreply.github.com>
Co-authored-by: Radosław Kapka <radoslaw.kapka@gmail.com>
Co-authored-by: Radosław Kapka <rkapka@wp.pl>
2025-10-27 23:27:34 +00:00

114 lines
3.6 KiB
Go

package query
import (
"errors"
"fmt"
"regexp"
"strconv"
"strings"
)
// PathElement represents a single element in a path.
type PathElement struct {
Length bool
Name string
// [Optional] Index for List/Vector elements
Index *uint64
}
var arrayIndexRegex = regexp.MustCompile(`\[\s*([^\]]+)\s*\]`)
var lengthRegex = regexp.MustCompile(`^\s*len\s*\(\s*([^)]+?)\s*\)\s*$`)
// ParsePath parses a raw path string into a slice of PathElements.
// note: field names are stored in snake case format. rawPath has to be provided in snake case.
// 1. Supports dot notation for field access (e.g., "field1.field2").
// 2. Supports array indexing using square brackets (e.g., "array_field[0]").
// 3. Supports length access using len() notation (e.g., "len(array_field)").
// 4. Handles leading dots and validates path format.
func ParsePath(rawPath string) ([]PathElement, error) {
rawElements := strings.Split(rawPath, ".")
if rawElements[0] == "" {
// Remove leading dot if present
rawElements = rawElements[1:]
}
var path []PathElement
for _, elem := range rawElements {
if elem == "" {
return nil, errors.New("invalid path: consecutive dots or trailing dot")
}
// Processing element string
processingField := elem
var pathElement PathElement
matches := lengthRegex.FindStringSubmatch(processingField)
// FindStringSubmatch matches a whole string like "len(field_name)" and its inner expression.
// For a path element to be a length query, len(matches) should be 2:
// 1. Full match: "len(field_name)"
// 2. Inner expression: "field_name"
if len(matches) == 2 {
pathElement.Length = true
// Extract the inner expression between len( and ) and continue parsing on that
processingField = matches[1]
}
// Default name is the full working string (may be updated below if it contains indices)
pathElement.Name = processingField
if strings.Contains(processingField, "[") {
// Split into field and indices, e.g., "array[0][1]" -> name:"array", indices:{0,1}
pathElement.Name = extractFieldName(processingField)
indices, err := extractArrayIndices(processingField)
if err != nil {
return nil, err
}
// Although extractArrayIndices supports multiple indices,
// only a single index is supported per PathElement, e.g., "transactions[0]" is valid
// while "transactions[0][0]" is rejected explicitly.
if len(indices) != 1 {
return nil, fmt.Errorf("multiple indices not supported in token %s", processingField)
}
pathElement.Index = &indices[0]
}
path = append(path, pathElement)
}
return path, nil
}
// extractFieldName extracts the field name from a path element name (removes array indices)
// For example: "field_name[5]" returns "field_name"
func extractFieldName(name string) string {
if idx := strings.Index(name, "["); idx != -1 {
return name[:idx]
}
return name
}
// extractArrayIndices returns every bracketed, non-negative index in the name,
// e.g. "array[0][1]" -> []uint64{0, 1}. Errors if none are found or if any index is invalid.
func extractArrayIndices(name string) ([]uint64, error) {
// Match all bracketed content, then we'll parse as unsigned to catch negatives explicitly
matches := arrayIndexRegex.FindAllStringSubmatch(name, -1)
if len(matches) == 0 {
return nil, errors.New("no array indices found")
}
indices := make([]uint64, 0, len(matches))
for _, m := range matches {
raw := strings.TrimSpace(m[1])
idx, err := strconv.ParseUint(raw, 10, 64)
if err != nil {
return nil, fmt.Errorf("invalid array index: %w", err)
}
indices = append(indices, idx)
}
return indices, nil
}