From d3bd0eaa30bbd06a46bb8356c45cd63b2c65da07 Mon Sep 17 00:00:00 2001 From: fernantho Date: Fri, 31 Oct 2025 18:37:59 +0100 Subject: [PATCH] SSZ-QL: update "path parsing" data types (#15935) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * updated path processing data types, refactored ParsePath and fixed tests * updated generalized index accordingly, changed input parameter path type from []PathElemen to Path * updated query.go accordingly, changed input parameter path type from []PathElemen to Path * added descriptive changelog * Update encoding/ssz/query/path.go Co-authored-by: Jun Song <87601811+syjn99@users.noreply.github.com> * Added documentation for Path struct and renamed to for clarity * Update encoding/ssz/query/path.go Co-authored-by: Radosław Kapka * updated changelog to its correct type: Changed * updated outdated comment in generalized_index.go and removed test in generalized_index_test.go as this one belongs in path_test.go * Added validateRawPath with strict raw-path validation only - no raw-path fixing is added. Added test suite covering * added extra tests for wrongly formated paths --------- Co-authored-by: Jun Song <87601811+syjn99@users.noreply.github.com> Co-authored-by: Radosław Kapka Co-authored-by: Radosław Kapka --- ...fernantho_ssz-ql-update-path-processing.md | 3 + encoding/ssz/query/generalized_index.go | 10 +- encoding/ssz/query/generalized_index_test.go | 10 +- encoding/ssz/query/path.go | 119 ++++-- encoding/ssz/query/path_test.go | 346 +++++++++++------- encoding/ssz/query/query.go | 8 +- 6 files changed, 327 insertions(+), 169 deletions(-) create mode 100644 changelog/fernantho_ssz-ql-update-path-processing.md diff --git a/changelog/fernantho_ssz-ql-update-path-processing.md b/changelog/fernantho_ssz-ql-update-path-processing.md new file mode 100644 index 0000000000..de626041a8 --- /dev/null +++ b/changelog/fernantho_ssz-ql-update-path-processing.md @@ -0,0 +1,3 @@ +## Changed +- Introduced Path type for SSZ-QL queries and updated PathElement (removed Length field, kept Index) enforcing that len queries are terminal (at most one per path). +- Changed length query syntax from `block.payload.len(transactions)` to `len(block.payload.transactions)` diff --git a/encoding/ssz/query/generalized_index.go b/encoding/ssz/query/generalized_index.go index 8c8f894266..9a581c8287 100644 --- a/encoding/ssz/query/generalized_index.go +++ b/encoding/ssz/query/generalized_index.go @@ -14,13 +14,13 @@ const listBaseIndex = 2 // 1. The sszInfo of the root object, to be able to navigate the SSZ structure // 2. The path to the field (e.g., "field_a.field_b[3].field_c") // It walks the path step by step, updating the generalized index at each step. -func GetGeneralizedIndexFromPath(info *SszInfo, path []PathElement) (uint64, error) { +func GetGeneralizedIndexFromPath(info *SszInfo, path Path) (uint64, error) { if info == nil { return 0, errors.New("SszInfo is nil") } // If path is empty, no generalized index can be computed. - if len(path) == 0 { + if len(path.Elements) == 0 { return 0, errors.New("cannot compute generalized index for an empty path") } @@ -28,7 +28,7 @@ func GetGeneralizedIndexFromPath(info *SszInfo, path []PathElement) (uint64, err currentIndex := uint64(1) currentInfo := info - for _, pathElement := range path { + for index, pathElement := range path.Elements { element := pathElement // Check that we are in a container to access fields @@ -52,8 +52,8 @@ func GetGeneralizedIndexFromPath(info *SszInfo, path []PathElement) (uint64, err currentIndex = currentIndex*nextPowerOfTwo(chunkCount) + fieldPos currentInfo = fieldSsz - // Check if a path element is a length field - if element.Length { + // Check for length access: element is the last in the path and requests length + if path.Length && index == len(path.Elements)-1 { currentInfo, currentIndex, err = calculateLengthGeneralizedIndex(fieldSsz, element, currentIndex) if err != nil { return 0, fmt.Errorf("length calculation error: %w", err) diff --git a/encoding/ssz/query/generalized_index_test.go b/encoding/ssz/query/generalized_index_test.go index 080cf4a6f9..25792807d4 100644 --- a/encoding/ssz/query/generalized_index_test.go +++ b/encoding/ssz/query/generalized_index_test.go @@ -65,12 +65,6 @@ func TestGetIndicesFromPath_FixedNestedContainer(t *testing.T) { expectedIndex: 3, expectError: false, }, - { - name: "Empty path error", - path: "", - expectError: true, - errorMessage: "empty path", - }, } for _, tc := range testCases { @@ -217,8 +211,8 @@ func TestGetIndicesFromPath_VariableTestContainer(t *testing.T) { expectError: false, }, { - name: "variable_container_list[0].inner_1.len(nested_list_field[3])", - path: "variable_container_list[0].inner_1.len(nested_list_field[3])", + name: "len(variable_container_list[0].inner_1.nested_list_field[3])", + path: "len(variable_container_list[0].inner_1.nested_list_field[3])", expectError: true, errorMessage: "length calculation error: len() is not supported for multi-dimensional arrays", }, diff --git a/encoding/ssz/query/path.go b/encoding/ssz/query/path.go index 569c96fdb5..6edd529649 100644 --- a/encoding/ssz/query/path.go +++ b/encoding/ssz/query/path.go @@ -10,51 +10,77 @@ import ( // PathElement represents a single element in a path. type PathElement struct { - Length bool - Name string + Name string // [Optional] Index for List/Vector elements Index *uint64 } -var arrayIndexRegex = regexp.MustCompile(`\[\s*([^\]]+)\s*\]`) +// Path represents the entire path structure for SSZ-QL queries. It consists of multiple PathElements +// and a flag indicating if the path is querying for length. +type Path struct { + // If true, the path is querying for the length of the final element in Elements field + Length bool + // Sequence of path elements representing the navigation through the SSZ structure + Elements []PathElement +} +// Matches an array index expression like [123] or [ foo ] and captures the inner content without the brackets. +var arrayIndexRegex = regexp.MustCompile(`\[(\d+)\]`) + +// Matches an entire string that’s a len() call (whitespace flexible), capturing the inner expression and disallowing any trailing characters. var lengthRegex = regexp.MustCompile(`^\s*len\s*\(\s*([^)]+?)\s*\)\s*$`) +// Valid path characters: letters, digits, dot, slash, square brackets and parentheses only. +// Any other character will render the path invalid. +var validPathChars = regexp.MustCompile(`^[A-Za-z0-9._\[\]\(\)]*$`) + +// Invalid patterns: a closing bracket followed directly by a letter or underscore +var invalidBracketPattern = regexp.MustCompile(`\][^.\[\)]|\).`) + // ParsePath parses a raw path string into a slice of PathElements. // note: field names are stored in snake case format. rawPath has to be provided in snake case. // 1. Supports dot notation for field access (e.g., "field1.field2"). // 2. Supports array indexing using square brackets (e.g., "array_field[0]"). // 3. Supports length access using len() notation (e.g., "len(array_field)"). // 4. Handles leading dots and validates path format. -func ParsePath(rawPath string) ([]PathElement, error) { - rawElements := strings.Split(rawPath, ".") +func ParsePath(rawPath string) (Path, error) { + if err := validateRawPath(rawPath); err != nil { + return Path{}, err + } + + var rawElements []string + var processedPath Path + + matches := lengthRegex.FindStringSubmatch(rawPath) + + // FindStringSubmatch matches a whole string like "len(field_name)" and its inner expression. + // For a path element to be a length query, len(matches) should be 2: + // 1. Full match: "len(field_name)" + // 2. Inner expression: "field_name" + if len(matches) == 2 { + processedPath.Length = true + // If we have found a len() expression, we only want to parse its inner expression. + rawElements = strings.Split(matches[1], ".") + } else { + // Normal path parsing + rawElements = strings.Split(rawPath, ".") + } if rawElements[0] == "" { // Remove leading dot if present rawElements = rawElements[1:] } - var path []PathElement + var pathElements []PathElement for _, elem := range rawElements { if elem == "" { - return nil, errors.New("invalid path: consecutive dots or trailing dot") + return Path{}, errors.New("invalid path: consecutive dots or trailing dot") } // Processing element string processingField := elem var pathElement PathElement - matches := lengthRegex.FindStringSubmatch(processingField) - // FindStringSubmatch matches a whole string like "len(field_name)" and its inner expression. - // For a path element to be a length query, len(matches) should be 2: - // 1. Full match: "len(field_name)" - // 2. Inner expression: "field_name" - if len(matches) == 2 { - pathElement.Length = true - // Extract the inner expression between len( and ) and continue parsing on that - processingField = matches[1] - } - // Default name is the full working string (may be updated below if it contains indices) pathElement.Name = processingField @@ -63,22 +89,71 @@ func ParsePath(rawPath string) ([]PathElement, error) { pathElement.Name = extractFieldName(processingField) indices, err := extractArrayIndices(processingField) if err != nil { - return nil, err + return Path{}, err } // Although extractArrayIndices supports multiple indices, // only a single index is supported per PathElement, e.g., "transactions[0]" is valid // while "transactions[0][0]" is rejected explicitly. if len(indices) != 1 { - return nil, fmt.Errorf("multiple indices not supported in token %s", processingField) + return Path{}, fmt.Errorf("multiple indices not supported in token %s", processingField) } pathElement.Index = &indices[0] } - path = append(path, pathElement) + pathElements = append(pathElements, pathElement) } - return path, nil + processedPath.Elements = pathElements + return processedPath, nil +} + +// validateRawPath performs initial validation of the raw path string: +// 1. Rejects invalid characters (only letters, digits, '.', '[]', and '()' are allowed). +// 2. Validates balanced parentheses +// 3. Validates balanced brackets. +// 4. Ensures len() calls are only at the start of the path. +// 5. Rejects empty len() calls. +// 6. Rejects invalid patterns like "][a" or "][_" which indicate malformed paths. +func validateRawPath(rawPath string) error { + // 1. Reject any path containing invalid characters (this includes spaces). + if !validPathChars.MatchString(rawPath) { + return fmt.Errorf("invalid character in path: only letters, digits, '.', '[]' and '()' are allowed") + } + + // 2. Basic validation for balanced parentheses: wrongly formatted paths like "test))((" are not rejected in this condition but later. + if strings.Count(rawPath, "(") != strings.Count(rawPath, ")") { + return fmt.Errorf("unmatched parentheses in path: %s", rawPath) + } + + // 3. Basic validation for balanced brackets: + // wrongly formatted paths like "array][0][" are rejected by checking bracket counts and format. + matches := arrayIndexRegex.FindAllStringSubmatch(rawPath, -1) + openBracketsCount := strings.Count(rawPath, "[") + closeBracketsCount := strings.Count(rawPath, "]") + if openBracketsCount != closeBracketsCount { + return fmt.Errorf("unmatched brackets in path: %s", rawPath) + } + if len(matches) != openBracketsCount || len(matches) != closeBracketsCount { + return fmt.Errorf("invalid bracket format in path: %s", rawPath) + } + + // 4. Reject len() calls not at the start of the path + if strings.Index(rawPath, "len(") > 0 { + return fmt.Errorf("len() call must be at the start of the path: %s", rawPath) + } + + // 5. Reject empty len() calls + if strings.Contains(rawPath, "len()") { + return fmt.Errorf("len() call must not be empty: %s", rawPath) + } + + // 6. Reject invalid patterns like "][a" or "][_" which indicate malformed paths + if invalidBracketPattern.MatchString(rawPath) { + return fmt.Errorf("invalid path format near brackets in path: %s", rawPath) + } + + return nil } // extractFieldName extracts the field name from a path element name (removes array indices) diff --git a/encoding/ssz/query/path_test.go b/encoding/ssz/query/path_test.go index 62f4a86c43..363aacfa44 100644 --- a/encoding/ssz/query/path_test.go +++ b/encoding/ssz/query/path_test.go @@ -14,199 +14,285 @@ func TestParsePath(t *testing.T) { tests := []struct { name string path string - expected []query.PathElement + expected query.Path wantErr bool }{ + { + name: "simple path", + path: "data", + expected: query.Path{ + Length: false, + Elements: []query.PathElement{ + {Name: "data"}, + }, + }, + wantErr: false, + }, + { + name: "simple path beginning with dot", + path: ".data", + expected: query.Path{ + Length: false, + Elements: []query.PathElement{ + {Name: "data"}, + }, + }, + wantErr: false, + }, + { + name: "simple path trailing dot", + path: "data.", + wantErr: true, + }, + { + name: "simple path surrounded by dot", + path: ".data.", + wantErr: true, + }, + { + name: "simple path beginning with two dots", + path: "..data", + wantErr: true, + }, { name: "simple nested path", path: "data.target.root", - expected: []query.PathElement{ - {Name: "data"}, - {Name: "target"}, - {Name: "root"}, + expected: query.Path{ + Length: false, + Elements: []query.PathElement{ + {Name: "data"}, + {Name: "target"}, + {Name: "root"}, + }, }, wantErr: false, }, { - name: "simple nested path with leading dot", - path: ".data.target.root", - expected: []query.PathElement{ - {Name: "data"}, - {Name: "target"}, - {Name: "root"}, + name: "len with top-level identifier", + path: "len(data)", + expected: query.Path{ + Length: true, + Elements: []query.PathElement{ + {Name: "data"}, + }, }, wantErr: false, }, { - name: "simple length path with length field", - path: "data.target.len(root)", - expected: []query.PathElement{ - {Name: "data"}, - {Name: "target"}, - {Name: "root", Length: true}, + name: "len with top-level identifier and leading dot", + path: "len(.data)", + expected: query.Path{ + Length: true, + Elements: []query.PathElement{ + {Name: "data"}, + }, }, wantErr: false, }, { - name: "len with top-level identifier", - path: "len(data)", - expected: []query.PathElement{{Name: "data", Length: true}}, - wantErr: false, + name: "len with top-level identifier and trailing dot", + path: "len(data.)", + wantErr: true, }, { - name: "length with messy whitespace", - path: "data.target. \tlen ( root ) ", - expected: []query.PathElement{ - {Name: "data"}, - {Name: "target"}, - {Name: "root", Length: true}, + name: "len with top-level identifier beginning dot", + path: ".len(data)", + wantErr: true, + }, + { + name: "len with dotted path inside", + path: "len(data.target.root)", + expected: query.Path{ + Length: true, + Elements: []query.PathElement{ + {Name: "data"}, + {Name: "target"}, + {Name: "root"}, + }, }, wantErr: false, }, { - name: "len with numeric index inside argument", - path: "data.len(a[10])", - expected: []query.PathElement{ - {Name: "data"}, - {Name: "a", Length: true, Index: u64(10)}, + name: "simple length path with non-outer length field", + path: "data.target.len(root)", + wantErr: true, + }, + { + name: "simple path with `len` used as a field name", + path: "data.len", + expected: query.Path{ + Length: false, + Elements: []query.PathElement{ + {Name: "data"}, + {Name: "len"}, + }, }, wantErr: false, }, { - name: "array index with spaces", - path: "arr[ 42 ]", - expected: []query.PathElement{{Name: "arr", Index: u64(42)}}, - wantErr: false, - }, - { - name: "array leading zeros", - path: "arr[001]", - expected: []query.PathElement{{Name: "arr", Index: u64(1)}}, - wantErr: false, - }, - { - name: "array max uint64", - path: "arr[18446744073709551615]", - expected: []query.PathElement{{Name: "arr", Index: u64(18446744073709551615)}}, - wantErr: false, - }, - { - name: "len with dotted path inside - no input validation - reverts at a later stage", - path: "len(data.target.root)", - expected: []query.PathElement{{Name: "len(data", Length: false}, {Name: "target", Length: false}, {Name: "root)", Length: false}}, - wantErr: false, - }, - { - name: "len with dotted path then more - no input validation - reverts at a later stage", - path: "len(data.target.root).foo", - expected: []query.PathElement{{Name: "len(data", Length: false}, {Name: "target", Length: false}, {Name: "root)", Length: false}, {Name: "foo", Length: false}}, - wantErr: false, - }, - { - name: "len without closing paren - no input validation - reverts at a later stage", - path: "len(root", - expected: []query.PathElement{{Name: "len(root"}}, - wantErr: false, - }, - { - name: "len with extra closing paren - no input validation - reverts at a later stage", - path: "len(root))", - expected: []query.PathElement{{Name: "len(root))"}}, - wantErr: false, - }, - { - name: "empty len argument - no input validation - reverts at a later stage", - path: "len()", - expected: []query.PathElement{{Name: "len()"}}, - wantErr: false, - }, - { - name: "len with comma-separated args - no input validation - reverts at a later stage", - path: "len(a,b)", - expected: []query.PathElement{{Name: "a,b", Length: true}}, - wantErr: false, - }, - { - name: "len call followed by index (outer) - no input validation - reverts at a later stage", - path: "data.len(root)[0]", - expected: []query.PathElement{ - {Name: "data"}, - {Name: "len(root)", Index: u64(0)}, + name: "simple path with `len` used as a field name + trailing field", + path: "data.len.value", + expected: query.Path{ + Length: false, + Elements: []query.PathElement{ + {Name: "data"}, + {Name: "len"}, + {Name: "value"}, + }, }, wantErr: false, }, { - name: "cannot provide consecutive dots in raw path", - path: "data..target.root", + name: "simple path with `len`", + path: "len.len", + expected: query.Path{ + Length: false, + Elements: []query.PathElement{ + {Name: "len"}, + {Name: "len"}, + }, + }, + wantErr: false, + }, + { + name: "simple length path with length field", + path: "len.len(root)", wantErr: true, }, { - name: "cannot provide a negative index in array path", - path: ".data.target.root[-1]", + name: "empty length field", + path: "len()", wantErr: true, }, { - name: "invalid index in array path", - path: ".data.target.root[a]", + name: "length field not terminal", + path: "len(data).foo", wantErr: true, }, { - name: "multidimensional array index in path", - path: ".data.target.root[0][1]", + name: "length field with missing closing paren", + path: "len(data", wantErr: true, }, { - name: "leading double dot", - path: "..data", - expected: nil, - wantErr: true, - }, - { - name: "trailing dot", - path: "data.target.", - expected: nil, - wantErr: true, - }, - { - name: "len with inner bracket non-numeric index", - path: "data.len(a[b])", + name: "length field with two closing paren", + path: "len(data))", wantErr: true, }, { - name: "array empty index", - path: "arr[]", + name: "len with comma-separated args", + path: "len(a,b)", wantErr: true, }, { - name: "array hex index", - path: "arr[0x10]", + name: "array index path", + path: "arr[42]", + expected: query.Path{ + Length: false, + Elements: []query.PathElement{ + {Name: "arr", Index: u64(42)}, + }, + }, + wantErr: false, + }, + { + name: "array index path with max uint64", + path: "arr[18446744073709551615]", + expected: query.Path{ + Length: false, + Elements: []query.PathElement{ + {Name: "arr", Index: u64(18446744073709551615)}, + }, + }, + wantErr: false, + }, + { + name: "array element in wrong nested path", + path: "arr[42]foo", wantErr: true, }, { - name: "array missing closing bracket", - path: "arr[12", + name: "array index in nested path", + path: "arr[42].foo", + expected: query.Path{ + Length: false, + Elements: []query.PathElement{ + {Name: "arr", Index: u64(42)}, + {Name: "foo"}, + }, + }, + wantErr: false, + }, + { + name: "array index in deeper nested path", + path: "arr[42].foo.bar[10]", + expected: query.Path{ + Length: false, + Elements: []query.PathElement{ + {Name: "arr", Index: u64(42)}, + {Name: "foo"}, + {Name: "bar", Index: u64(10)}, + }, + }, + wantErr: false, + }, + { + name: "length of array element", + path: "len(arr[42])", + expected: query.Path{ + Length: true, + Elements: []query.PathElement{ + {Name: "arr", Index: u64(42)}, + }, + }, + wantErr: false, + }, + { + name: "length of array + trailing item", + path: "len(arr)[0]", wantErr: true, }, { - name: "array plus sign index", - path: "arr[+3]", + name: "length of nested path within array element", + path: "len(arr[42].foo)", + expected: query.Path{ + Length: true, + Elements: []query.PathElement{ + {Name: "arr", Index: u64(42)}, + {Name: "foo"}, + }, + }, + wantErr: false, + }, + { + name: "empty spaces in path", + path: "data . target", wantErr: true, }, { - name: "array unicode digits", - path: "arr[12]", + name: "leading dot + empty spaces", + path: ". data", wantErr: true, }, { - name: "array overflow uint64", - path: "arr[18446744073709551616]", + name: "length with leading dot + empty spaces", + path: "len(. data)", wantErr: true, }, { - name: "array index then suffix", - path: "field[1]suffix", - expected: []query.PathElement{{Name: "field", Index: u64(1)}}, - wantErr: false, + name: "Empty path error", + path: "", + expected: query.Path{}, + }, + { + name: "length with leading dot + empty spaces", + path: "test))((", + wantErr: true, + }, + { + name: "length with leading dot + empty spaces", + path: "array][0][", + wantErr: true, }, } @@ -220,7 +306,7 @@ func TestParsePath(t *testing.T) { } require.NoError(t, err) - require.Equal(t, len(tt.expected), len(parsedPath), "Expected %d path elements, got %d", len(tt.expected), len(parsedPath)) + require.Equal(t, len(tt.expected.Elements), len(parsedPath.Elements), "Expected %d path elements, got %d", len(tt.expected.Elements), len(parsedPath.Elements)) require.DeepEqual(t, tt.expected, parsedPath, "Parsed path does not match expected path") }) } diff --git a/encoding/ssz/query/query.go b/encoding/ssz/query/query.go index 8b90788eed..44467b5e63 100644 --- a/encoding/ssz/query/query.go +++ b/encoding/ssz/query/query.go @@ -7,19 +7,19 @@ import ( // CalculateOffsetAndLength calculates the offset and length of a given path within the SSZ object. // By walking the given path, it accumulates the offsets based on SszInfo. -func CalculateOffsetAndLength(sszInfo *SszInfo, path []PathElement) (*SszInfo, uint64, uint64, error) { +func CalculateOffsetAndLength(sszInfo *SszInfo, path Path) (*SszInfo, uint64, uint64, error) { if sszInfo == nil { return nil, 0, 0, errors.New("sszInfo is nil") } - if len(path) == 0 { + if len(path.Elements) == 0 { return nil, 0, 0, errors.New("path is empty") } walk := sszInfo offset := uint64(0) - for pathIndex, elem := range path { + for pathIndex, elem := range path.Elements { containerInfo, err := walk.ContainerInfo() if err != nil { return nil, 0, 0, fmt.Errorf("could not get field infos: %w", err) @@ -56,7 +56,7 @@ func CalculateOffsetAndLength(sszInfo *SszInfo, path []PathElement) (*SszInfo, u // to the next field's sszInfo, which would have the correct size information. // However, if this is the last element in the path, we need to ensure we return the correct size // for the indexed element. Hence, we return the size from elementSizes. - if pathIndex == len(path)-1 { + if pathIndex == len(path.Elements)-1 { return walk, offset, listInfo.elementSizes[index], nil } } else {