From 32fc042929581d5215cc7973bd6f2e37e79e04f1 Mon Sep 17 00:00:00 2001
From: Nathan Sobo <nathan@github.com>
Date: Fri, 3 Aug 2012 23:52:58 -0600
Subject: [PATCH] Add TextMateGrammar.buildCaptureTree, which organizes nested
 captures in a tree

This will be a weapon in dealing with capture groups that nest within other capture groups, and also helps deal with trailing lookahead groups that don't belong in the main match. I made it a class method because it's stateless and that made it easier to test.
---
 spec/app/text-mate-grammar-spec.coffee | 32 ++++++++++++++++++++++++++
 src/app/text-mate-grammar.coffee       | 20 ++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/spec/app/text-mate-grammar-spec.coffee b/spec/app/text-mate-grammar-spec.coffee
index 2aa07bfdf..322d199bb 100644
--- a/spec/app/text-mate-grammar-spec.coffee
+++ b/spec/app/text-mate-grammar-spec.coffee
@@ -127,3 +127,35 @@ describe "TextMateGrammar", ->
         expect(tokens[6]).toEqual value: '"',  scopes: ["source.coffee","string.quoted.double.coffee","source.coffee.embedded.source","string.quoted.double.coffee","punctuation.definition.string.end.coffee"]
         expect(tokens[7]).toEqual value: '}',  scopes: ["source.coffee","string.quoted.double.coffee","source.coffee.embedded.source","punctuation.section.embedded.coffee"]
         expect(tokens[8]).toEqual value: '"',  scopes: ["source.coffee","string.quoted.double.coffee","punctuation.definition.string.end.coffee"]
+
+  describe "@buildCaptureTree(captures, startPositions)", ->
+    it "converts a match array into a tree based on the nesting of its capture groups", ->
+      # The example has multiple nested capture groups, w/ one lookahead group on the end -- [ij] -- that
+      # is not included as part of the overall match and therefore excluded from the tree
+      # (a((bc)d)e(f(g)(h))[ij]
+      match = ["abcdefgh", "bcd", "bc", "fgh", "g", "h", "ij"]
+      startPositions = [0, 1, 1, 5, 6, 7, 8]
+
+      tree = TextMateGrammar.buildCaptureTree(match, startPositions)
+      expect(tree).toEqual
+        text: "abcdefgh"
+        index: 0
+        position: 0
+        captures: [
+          {
+            text: "bcd"
+            index: 1
+            position: 1
+            captures: [{ text: "bc", index: 2, position: 1 }]
+          },
+          {
+            text: "fgh"
+            index: 3
+            position: 5
+            captures: [
+              { text: "g", index: 4, position: 6 }
+              { text: "h", index: 5, position: 7 }
+            ]
+          }
+        ]
+
diff --git a/src/app/text-mate-grammar.coffee b/src/app/text-mate-grammar.coffee
index 23e54073f..846c5fc25 100644
--- a/src/app/text-mate-grammar.coffee
+++ b/src/app/text-mate-grammar.coffee
@@ -54,6 +54,22 @@ class TextMateGrammar
     else if name == "$self"
       @initialRule
 
+  @buildCaptureTree: (captures, startPositions, totalCaptures=captures.length) ->
+    index = totalCaptures - captures.length
+    text = captures.shift()
+    startPosition = startPositions.shift()
+    endPosition = startPosition + text.length
+
+    tree = { index, text, position: startPosition }
+
+    childCaptures = []
+    while startPositions[0] < endPosition
+      childCaptures.push(@buildCaptureTree(captures, startPositions, totalCaptures))
+
+    tree.captures = childCaptures if childCaptures.length
+    tree
+
+
 class Rule
   grammar: null
   scopeName: null
@@ -136,6 +152,10 @@ class Pattern
     tokens = []
     previousCaptureEndPosition = 0
 
+    console.log match
+    console.log match.indices
+    console.log @captures
+
     for captureIndex in _.keys(@captures)
       currentCaptureText = match[captureIndex]
       continue unless currentCaptureText.length