Add TextMateGrammar.buildCaptureTree, which organizes nested captures in a tree

This will be a weapon in dealing with capture groups that nest within other capture groups, and also helps deal with trailing lookahead groups that don't belong in the main match. I made it a class method because it's stateless and that made it easier to test.
2026-01-23 22:08:08 -05:00 · 2012-08-03 23:52:58 -06:00
parent fd28a3577a
commit 32fc042929
2 changed files with 52 additions and 0 deletions
--- a/spec/app/text-mate-grammar-spec.coffee
+++ b/spec/app/text-mate-grammar-spec.coffee
@@ -127,3 +127,35 @@ describe "TextMateGrammar", ->
        expect(tokens[6]).toEqual value: '"',  scopes: ["source.coffee","string.quoted.double.coffee","source.coffee.embedded.source","string.quoted.double.coffee","punctuation.definition.string.end.coffee"]
        expect(tokens[7]).toEqual value: '}',  scopes: ["source.coffee","string.quoted.double.coffee","source.coffee.embedded.source","punctuation.section.embedded.coffee"]
        expect(tokens[8]).toEqual value: '"',  scopes: ["source.coffee","string.quoted.double.coffee","punctuation.definition.string.end.coffee"]
+
+  describe "@buildCaptureTree(captures, startPositions)", ->
+    it "converts a match array into a tree based on the nesting of its capture groups", ->
+      # The example has multiple nested capture groups, w/ one lookahead group on the end -- [ij] -- that
+      # is not included as part of the overall match and therefore excluded from the tree
+      # (a((bc)d)e(f(g)(h))[ij]
+      match = ["abcdefgh", "bcd", "bc", "fgh", "g", "h", "ij"]
+      startPositions = [0, 1, 1, 5, 6, 7, 8]
+
+      tree = TextMateGrammar.buildCaptureTree(match, startPositions)
+      expect(tree).toEqual
+        text: "abcdefgh"
+        index: 0
+        position: 0
+        captures: [
+          {
+            text: "bcd"
+            index: 1
+            position: 1
+            captures: [{ text: "bc", index: 2, position: 1 }]
+          },
+          {
+            text: "fgh"
+            index: 3
+            position: 5
+            captures: [
+              { text: "g", index: 4, position: 6 }
+              { text: "h", index: 5, position: 7 }
+            ]
+          }
+        ]
+
--- a/src/app/text-mate-grammar.coffee
+++ b/src/app/text-mate-grammar.coffee
@@ -54,6 +54,22 @@ class TextMateGrammar
    else if name == "$self"
      @initialRule

+  @buildCaptureTree: (captures, startPositions, totalCaptures=captures.length) ->
+    index = totalCaptures - captures.length
+    text = captures.shift()
+    startPosition = startPositions.shift()
+    endPosition = startPosition + text.length
+
+    tree = { index, text, position: startPosition }
+
+    childCaptures = []
+    while startPositions[0] < endPosition
+      childCaptures.push(@buildCaptureTree(captures, startPositions, totalCaptures))
+
+    tree.captures = childCaptures if childCaptures.length
+    tree
+
+
 class Rule
  grammar: null
  scopeName: null
@@ -136,6 +152,10 @@ class Pattern
    tokens = []
    previousCaptureEndPosition = 0

+    console.log match
+    console.log match.indices
+    console.log @captures
+
    for captureIndex in _.keys(@captures)
      currentCaptureText = match[captureIndex]
      continue unless currentCaptureText.length