SSZ-QL: update "path parsing" data types (#15935)

* updated path processing data types, refactored ParsePath and fixed tests

* updated generalized index accordingly, changed input parameter path type from []PathElemen to Path

* updated query.go accordingly, changed input parameter path type from []PathElemen to Path

* added descriptive changelog

* Update encoding/ssz/query/path.go

Co-authored-by: Jun Song <87601811+syjn99@users.noreply.github.com>

* Added documentation for Path struct and renamed  to  for clarity

* Update encoding/ssz/query/path.go

Co-authored-by: Radosław Kapka <radoslaw.kapka@gmail.com>

* updated changelog to its correct type: Changed

* updated outdated comment in generalized_index.go and removed test in generalized_index_test.go as this one belongs in path_test.go

* Added validateRawPath with strict raw-path validation only - no raw-path fixing is added. Added test suite covering

* added extra tests for wrongly formated paths

---------

Co-authored-by: Jun Song <87601811+syjn99@users.noreply.github.com>
Co-authored-by: Radosław Kapka <radoslaw.kapka@gmail.com>
Co-authored-by: Radosław Kapka <rkapka@wp.pl>
This commit is contained in:
fernantho
2025-10-31 18:37:59 +01:00
committed by GitHub
parent 577899bfec
commit d3bd0eaa30
6 changed files with 327 additions and 169 deletions

View File

@@ -14,13 +14,13 @@ const listBaseIndex = 2
// 1. The sszInfo of the root object, to be able to navigate the SSZ structure
// 2. The path to the field (e.g., "field_a.field_b[3].field_c")
// It walks the path step by step, updating the generalized index at each step.
func GetGeneralizedIndexFromPath(info *SszInfo, path []PathElement) (uint64, error) {
func GetGeneralizedIndexFromPath(info *SszInfo, path Path) (uint64, error) {
if info == nil {
return 0, errors.New("SszInfo is nil")
}
// If path is empty, no generalized index can be computed.
if len(path) == 0 {
if len(path.Elements) == 0 {
return 0, errors.New("cannot compute generalized index for an empty path")
}
@@ -28,7 +28,7 @@ func GetGeneralizedIndexFromPath(info *SszInfo, path []PathElement) (uint64, err
currentIndex := uint64(1)
currentInfo := info
for _, pathElement := range path {
for index, pathElement := range path.Elements {
element := pathElement
// Check that we are in a container to access fields
@@ -52,8 +52,8 @@ func GetGeneralizedIndexFromPath(info *SszInfo, path []PathElement) (uint64, err
currentIndex = currentIndex*nextPowerOfTwo(chunkCount) + fieldPos
currentInfo = fieldSsz
// Check if a path element is a length field
if element.Length {
// Check for length access: element is the last in the path and requests length
if path.Length && index == len(path.Elements)-1 {
currentInfo, currentIndex, err = calculateLengthGeneralizedIndex(fieldSsz, element, currentIndex)
if err != nil {
return 0, fmt.Errorf("length calculation error: %w", err)

View File

@@ -65,12 +65,6 @@ func TestGetIndicesFromPath_FixedNestedContainer(t *testing.T) {
expectedIndex: 3,
expectError: false,
},
{
name: "Empty path error",
path: "",
expectError: true,
errorMessage: "empty path",
},
}
for _, tc := range testCases {
@@ -217,8 +211,8 @@ func TestGetIndicesFromPath_VariableTestContainer(t *testing.T) {
expectError: false,
},
{
name: "variable_container_list[0].inner_1.len(nested_list_field[3])",
path: "variable_container_list[0].inner_1.len(nested_list_field[3])",
name: "len(variable_container_list[0].inner_1.nested_list_field[3])",
path: "len(variable_container_list[0].inner_1.nested_list_field[3])",
expectError: true,
errorMessage: "length calculation error: len() is not supported for multi-dimensional arrays",
},

View File

@@ -10,51 +10,77 @@ import (
// PathElement represents a single element in a path.
type PathElement struct {
Length bool
Name string
Name string
// [Optional] Index for List/Vector elements
Index *uint64
}
var arrayIndexRegex = regexp.MustCompile(`\[\s*([^\]]+)\s*\]`)
// Path represents the entire path structure for SSZ-QL queries. It consists of multiple PathElements
// and a flag indicating if the path is querying for length.
type Path struct {
// If true, the path is querying for the length of the final element in Elements field
Length bool
// Sequence of path elements representing the navigation through the SSZ structure
Elements []PathElement
}
// Matches an array index expression like [123] or [ foo ] and captures the inner content without the brackets.
var arrayIndexRegex = regexp.MustCompile(`\[(\d+)\]`)
// Matches an entire string thats a len(<expr>) call (whitespace flexible), capturing the inner expression and disallowing any trailing characters.
var lengthRegex = regexp.MustCompile(`^\s*len\s*\(\s*([^)]+?)\s*\)\s*$`)
// Valid path characters: letters, digits, dot, slash, square brackets and parentheses only.
// Any other character will render the path invalid.
var validPathChars = regexp.MustCompile(`^[A-Za-z0-9._\[\]\(\)]*$`)
// Invalid patterns: a closing bracket followed directly by a letter or underscore
var invalidBracketPattern = regexp.MustCompile(`\][^.\[\)]|\).`)
// ParsePath parses a raw path string into a slice of PathElements.
// note: field names are stored in snake case format. rawPath has to be provided in snake case.
// 1. Supports dot notation for field access (e.g., "field1.field2").
// 2. Supports array indexing using square brackets (e.g., "array_field[0]").
// 3. Supports length access using len() notation (e.g., "len(array_field)").
// 4. Handles leading dots and validates path format.
func ParsePath(rawPath string) ([]PathElement, error) {
rawElements := strings.Split(rawPath, ".")
func ParsePath(rawPath string) (Path, error) {
if err := validateRawPath(rawPath); err != nil {
return Path{}, err
}
var rawElements []string
var processedPath Path
matches := lengthRegex.FindStringSubmatch(rawPath)
// FindStringSubmatch matches a whole string like "len(field_name)" and its inner expression.
// For a path element to be a length query, len(matches) should be 2:
// 1. Full match: "len(field_name)"
// 2. Inner expression: "field_name"
if len(matches) == 2 {
processedPath.Length = true
// If we have found a len() expression, we only want to parse its inner expression.
rawElements = strings.Split(matches[1], ".")
} else {
// Normal path parsing
rawElements = strings.Split(rawPath, ".")
}
if rawElements[0] == "" {
// Remove leading dot if present
rawElements = rawElements[1:]
}
var path []PathElement
var pathElements []PathElement
for _, elem := range rawElements {
if elem == "" {
return nil, errors.New("invalid path: consecutive dots or trailing dot")
return Path{}, errors.New("invalid path: consecutive dots or trailing dot")
}
// Processing element string
processingField := elem
var pathElement PathElement
matches := lengthRegex.FindStringSubmatch(processingField)
// FindStringSubmatch matches a whole string like "len(field_name)" and its inner expression.
// For a path element to be a length query, len(matches) should be 2:
// 1. Full match: "len(field_name)"
// 2. Inner expression: "field_name"
if len(matches) == 2 {
pathElement.Length = true
// Extract the inner expression between len( and ) and continue parsing on that
processingField = matches[1]
}
// Default name is the full working string (may be updated below if it contains indices)
pathElement.Name = processingField
@@ -63,22 +89,71 @@ func ParsePath(rawPath string) ([]PathElement, error) {
pathElement.Name = extractFieldName(processingField)
indices, err := extractArrayIndices(processingField)
if err != nil {
return nil, err
return Path{}, err
}
// Although extractArrayIndices supports multiple indices,
// only a single index is supported per PathElement, e.g., "transactions[0]" is valid
// while "transactions[0][0]" is rejected explicitly.
if len(indices) != 1 {
return nil, fmt.Errorf("multiple indices not supported in token %s", processingField)
return Path{}, fmt.Errorf("multiple indices not supported in token %s", processingField)
}
pathElement.Index = &indices[0]
}
path = append(path, pathElement)
pathElements = append(pathElements, pathElement)
}
return path, nil
processedPath.Elements = pathElements
return processedPath, nil
}
// validateRawPath performs initial validation of the raw path string:
// 1. Rejects invalid characters (only letters, digits, '.', '[]', and '()' are allowed).
// 2. Validates balanced parentheses
// 3. Validates balanced brackets.
// 4. Ensures len() calls are only at the start of the path.
// 5. Rejects empty len() calls.
// 6. Rejects invalid patterns like "][a" or "][_" which indicate malformed paths.
func validateRawPath(rawPath string) error {
// 1. Reject any path containing invalid characters (this includes spaces).
if !validPathChars.MatchString(rawPath) {
return fmt.Errorf("invalid character in path: only letters, digits, '.', '[]' and '()' are allowed")
}
// 2. Basic validation for balanced parentheses: wrongly formatted paths like "test))((" are not rejected in this condition but later.
if strings.Count(rawPath, "(") != strings.Count(rawPath, ")") {
return fmt.Errorf("unmatched parentheses in path: %s", rawPath)
}
// 3. Basic validation for balanced brackets:
// wrongly formatted paths like "array][0][" are rejected by checking bracket counts and format.
matches := arrayIndexRegex.FindAllStringSubmatch(rawPath, -1)
openBracketsCount := strings.Count(rawPath, "[")
closeBracketsCount := strings.Count(rawPath, "]")
if openBracketsCount != closeBracketsCount {
return fmt.Errorf("unmatched brackets in path: %s", rawPath)
}
if len(matches) != openBracketsCount || len(matches) != closeBracketsCount {
return fmt.Errorf("invalid bracket format in path: %s", rawPath)
}
// 4. Reject len() calls not at the start of the path
if strings.Index(rawPath, "len(") > 0 {
return fmt.Errorf("len() call must be at the start of the path: %s", rawPath)
}
// 5. Reject empty len() calls
if strings.Contains(rawPath, "len()") {
return fmt.Errorf("len() call must not be empty: %s", rawPath)
}
// 6. Reject invalid patterns like "][a" or "][_" which indicate malformed paths
if invalidBracketPattern.MatchString(rawPath) {
return fmt.Errorf("invalid path format near brackets in path: %s", rawPath)
}
return nil
}
// extractFieldName extracts the field name from a path element name (removes array indices)

View File

@@ -14,199 +14,285 @@ func TestParsePath(t *testing.T) {
tests := []struct {
name string
path string
expected []query.PathElement
expected query.Path
wantErr bool
}{
{
name: "simple path",
path: "data",
expected: query.Path{
Length: false,
Elements: []query.PathElement{
{Name: "data"},
},
},
wantErr: false,
},
{
name: "simple path beginning with dot",
path: ".data",
expected: query.Path{
Length: false,
Elements: []query.PathElement{
{Name: "data"},
},
},
wantErr: false,
},
{
name: "simple path trailing dot",
path: "data.",
wantErr: true,
},
{
name: "simple path surrounded by dot",
path: ".data.",
wantErr: true,
},
{
name: "simple path beginning with two dots",
path: "..data",
wantErr: true,
},
{
name: "simple nested path",
path: "data.target.root",
expected: []query.PathElement{
{Name: "data"},
{Name: "target"},
{Name: "root"},
expected: query.Path{
Length: false,
Elements: []query.PathElement{
{Name: "data"},
{Name: "target"},
{Name: "root"},
},
},
wantErr: false,
},
{
name: "simple nested path with leading dot",
path: ".data.target.root",
expected: []query.PathElement{
{Name: "data"},
{Name: "target"},
{Name: "root"},
name: "len with top-level identifier",
path: "len(data)",
expected: query.Path{
Length: true,
Elements: []query.PathElement{
{Name: "data"},
},
},
wantErr: false,
},
{
name: "simple length path with length field",
path: "data.target.len(root)",
expected: []query.PathElement{
{Name: "data"},
{Name: "target"},
{Name: "root", Length: true},
name: "len with top-level identifier and leading dot",
path: "len(.data)",
expected: query.Path{
Length: true,
Elements: []query.PathElement{
{Name: "data"},
},
},
wantErr: false,
},
{
name: "len with top-level identifier",
path: "len(data)",
expected: []query.PathElement{{Name: "data", Length: true}},
wantErr: false,
name: "len with top-level identifier and trailing dot",
path: "len(data.)",
wantErr: true,
},
{
name: "length with messy whitespace",
path: "data.target. \tlen ( root ) ",
expected: []query.PathElement{
{Name: "data"},
{Name: "target"},
{Name: "root", Length: true},
name: "len with top-level identifier beginning dot",
path: ".len(data)",
wantErr: true,
},
{
name: "len with dotted path inside",
path: "len(data.target.root)",
expected: query.Path{
Length: true,
Elements: []query.PathElement{
{Name: "data"},
{Name: "target"},
{Name: "root"},
},
},
wantErr: false,
},
{
name: "len with numeric index inside argument",
path: "data.len(a[10])",
expected: []query.PathElement{
{Name: "data"},
{Name: "a", Length: true, Index: u64(10)},
name: "simple length path with non-outer length field",
path: "data.target.len(root)",
wantErr: true,
},
{
name: "simple path with `len` used as a field name",
path: "data.len",
expected: query.Path{
Length: false,
Elements: []query.PathElement{
{Name: "data"},
{Name: "len"},
},
},
wantErr: false,
},
{
name: "array index with spaces",
path: "arr[ 42 ]",
expected: []query.PathElement{{Name: "arr", Index: u64(42)}},
wantErr: false,
},
{
name: "array leading zeros",
path: "arr[001]",
expected: []query.PathElement{{Name: "arr", Index: u64(1)}},
wantErr: false,
},
{
name: "array max uint64",
path: "arr[18446744073709551615]",
expected: []query.PathElement{{Name: "arr", Index: u64(18446744073709551615)}},
wantErr: false,
},
{
name: "len with dotted path inside - no input validation - reverts at a later stage",
path: "len(data.target.root)",
expected: []query.PathElement{{Name: "len(data", Length: false}, {Name: "target", Length: false}, {Name: "root)", Length: false}},
wantErr: false,
},
{
name: "len with dotted path then more - no input validation - reverts at a later stage",
path: "len(data.target.root).foo",
expected: []query.PathElement{{Name: "len(data", Length: false}, {Name: "target", Length: false}, {Name: "root)", Length: false}, {Name: "foo", Length: false}},
wantErr: false,
},
{
name: "len without closing paren - no input validation - reverts at a later stage",
path: "len(root",
expected: []query.PathElement{{Name: "len(root"}},
wantErr: false,
},
{
name: "len with extra closing paren - no input validation - reverts at a later stage",
path: "len(root))",
expected: []query.PathElement{{Name: "len(root))"}},
wantErr: false,
},
{
name: "empty len argument - no input validation - reverts at a later stage",
path: "len()",
expected: []query.PathElement{{Name: "len()"}},
wantErr: false,
},
{
name: "len with comma-separated args - no input validation - reverts at a later stage",
path: "len(a,b)",
expected: []query.PathElement{{Name: "a,b", Length: true}},
wantErr: false,
},
{
name: "len call followed by index (outer) - no input validation - reverts at a later stage",
path: "data.len(root)[0]",
expected: []query.PathElement{
{Name: "data"},
{Name: "len(root)", Index: u64(0)},
name: "simple path with `len` used as a field name + trailing field",
path: "data.len.value",
expected: query.Path{
Length: false,
Elements: []query.PathElement{
{Name: "data"},
{Name: "len"},
{Name: "value"},
},
},
wantErr: false,
},
{
name: "cannot provide consecutive dots in raw path",
path: "data..target.root",
name: "simple path with `len`",
path: "len.len",
expected: query.Path{
Length: false,
Elements: []query.PathElement{
{Name: "len"},
{Name: "len"},
},
},
wantErr: false,
},
{
name: "simple length path with length field",
path: "len.len(root)",
wantErr: true,
},
{
name: "cannot provide a negative index in array path",
path: ".data.target.root[-1]",
name: "empty length field",
path: "len()",
wantErr: true,
},
{
name: "invalid index in array path",
path: ".data.target.root[a]",
name: "length field not terminal",
path: "len(data).foo",
wantErr: true,
},
{
name: "multidimensional array index in path",
path: ".data.target.root[0][1]",
name: "length field with missing closing paren",
path: "len(data",
wantErr: true,
},
{
name: "leading double dot",
path: "..data",
expected: nil,
wantErr: true,
},
{
name: "trailing dot",
path: "data.target.",
expected: nil,
wantErr: true,
},
{
name: "len with inner bracket non-numeric index",
path: "data.len(a[b])",
name: "length field with two closing paren",
path: "len(data))",
wantErr: true,
},
{
name: "array empty index",
path: "arr[]",
name: "len with comma-separated args",
path: "len(a,b)",
wantErr: true,
},
{
name: "array hex index",
path: "arr[0x10]",
name: "array index path",
path: "arr[42]",
expected: query.Path{
Length: false,
Elements: []query.PathElement{
{Name: "arr", Index: u64(42)},
},
},
wantErr: false,
},
{
name: "array index path with max uint64",
path: "arr[18446744073709551615]",
expected: query.Path{
Length: false,
Elements: []query.PathElement{
{Name: "arr", Index: u64(18446744073709551615)},
},
},
wantErr: false,
},
{
name: "array element in wrong nested path",
path: "arr[42]foo",
wantErr: true,
},
{
name: "array missing closing bracket",
path: "arr[12",
name: "array index in nested path",
path: "arr[42].foo",
expected: query.Path{
Length: false,
Elements: []query.PathElement{
{Name: "arr", Index: u64(42)},
{Name: "foo"},
},
},
wantErr: false,
},
{
name: "array index in deeper nested path",
path: "arr[42].foo.bar[10]",
expected: query.Path{
Length: false,
Elements: []query.PathElement{
{Name: "arr", Index: u64(42)},
{Name: "foo"},
{Name: "bar", Index: u64(10)},
},
},
wantErr: false,
},
{
name: "length of array element",
path: "len(arr[42])",
expected: query.Path{
Length: true,
Elements: []query.PathElement{
{Name: "arr", Index: u64(42)},
},
},
wantErr: false,
},
{
name: "length of array + trailing item",
path: "len(arr)[0]",
wantErr: true,
},
{
name: "array plus sign index",
path: "arr[+3]",
name: "length of nested path within array element",
path: "len(arr[42].foo)",
expected: query.Path{
Length: true,
Elements: []query.PathElement{
{Name: "arr", Index: u64(42)},
{Name: "foo"},
},
},
wantErr: false,
},
{
name: "empty spaces in path",
path: "data . target",
wantErr: true,
},
{
name: "array unicode digits",
path: "arr[]",
name: "leading dot + empty spaces",
path: ". data",
wantErr: true,
},
{
name: "array overflow uint64",
path: "arr[18446744073709551616]",
name: "length with leading dot + empty spaces",
path: "len(. data)",
wantErr: true,
},
{
name: "array index then suffix",
path: "field[1]suffix",
expected: []query.PathElement{{Name: "field", Index: u64(1)}},
wantErr: false,
name: "Empty path error",
path: "",
expected: query.Path{},
},
{
name: "length with leading dot + empty spaces",
path: "test))((",
wantErr: true,
},
{
name: "length with leading dot + empty spaces",
path: "array][0][",
wantErr: true,
},
}
@@ -220,7 +306,7 @@ func TestParsePath(t *testing.T) {
}
require.NoError(t, err)
require.Equal(t, len(tt.expected), len(parsedPath), "Expected %d path elements, got %d", len(tt.expected), len(parsedPath))
require.Equal(t, len(tt.expected.Elements), len(parsedPath.Elements), "Expected %d path elements, got %d", len(tt.expected.Elements), len(parsedPath.Elements))
require.DeepEqual(t, tt.expected, parsedPath, "Parsed path does not match expected path")
})
}

View File

@@ -7,19 +7,19 @@ import (
// CalculateOffsetAndLength calculates the offset and length of a given path within the SSZ object.
// By walking the given path, it accumulates the offsets based on SszInfo.
func CalculateOffsetAndLength(sszInfo *SszInfo, path []PathElement) (*SszInfo, uint64, uint64, error) {
func CalculateOffsetAndLength(sszInfo *SszInfo, path Path) (*SszInfo, uint64, uint64, error) {
if sszInfo == nil {
return nil, 0, 0, errors.New("sszInfo is nil")
}
if len(path) == 0 {
if len(path.Elements) == 0 {
return nil, 0, 0, errors.New("path is empty")
}
walk := sszInfo
offset := uint64(0)
for pathIndex, elem := range path {
for pathIndex, elem := range path.Elements {
containerInfo, err := walk.ContainerInfo()
if err != nil {
return nil, 0, 0, fmt.Errorf("could not get field infos: %w", err)
@@ -56,7 +56,7 @@ func CalculateOffsetAndLength(sszInfo *SszInfo, path []PathElement) (*SszInfo, u
// to the next field's sszInfo, which would have the correct size information.
// However, if this is the last element in the path, we need to ensure we return the correct size
// for the indexed element. Hence, we return the size from elementSizes.
if pathIndex == len(path)-1 {
if pathIndex == len(path.Elements)-1 {
return walk, offset, listInfo.elementSizes[index], nil
}
} else {