Add native OnigScanner, which handles matching multiple regexes on a line

This cuts the tokenization time from 250ms to 70ms by avoiding js <-> native overhead
This commit is contained in:
Nathan Sobo
2012-09-27 12:55:39 -06:00
parent e9e0e24ce3
commit 32e36700ae
6 changed files with 164 additions and 2 deletions

View File

@@ -285,6 +285,8 @@
'native/v8_extensions/native.h',
'native/v8_extensions/onig_reg_exp.mm',
'native/v8_extensions/onig_reg_exp.h',
'native/v8_extensions/onig_scanner.mm',
'native/v8_extensions/onig_scanner.h',
'native/v8_extensions/atom.mm',
'native/v8_extensions/atom.h',
],

View File

@@ -2,6 +2,7 @@
#import "native/v8_extensions/atom.h"
#import "native/v8_extensions/native.h"
#import "native/v8_extensions/onig_reg_exp.h"
#import "native/v8_extensions/onig_scanner.h"
#import "native/message_translation.h"
#include <iostream>
@@ -9,6 +10,7 @@ void AtomCefRenderProcessHandler::OnWebKitInitialized() {
new v8_extensions::Atom();
new v8_extensions::Native();
new v8_extensions::OnigRegExp();
new v8_extensions::OnigScanner();
}
void AtomCefRenderProcessHandler::OnContextCreated(CefRefPtr<CefBrowser> browser,

View File

@@ -0,0 +1,20 @@
#include "include/cef_base.h"
#include "include/cef_v8.h"
namespace v8_extensions {
class OnigScanner : public CefV8Handler {
public:
OnigScanner();
virtual bool Execute(const CefString& name,
CefRefPtr<CefV8Value> object,
const CefV8ValueList& arguments,
CefRefPtr<CefV8Value>& retval,
CefString& exception) OVERRIDE;
// Provide the reference counting implementation for this class.
IMPLEMENT_REFCOUNTING(OnigRegExp);
};
}

View File

@@ -0,0 +1,17 @@
(function() {
native function buildScanner(sources);
native function findNextMatch(string, startPosition);
function OnigScanner(sources) {
var scanner = buildScanner(sources);
scanner.constructor = OnigScanner;
scanner.__proto__ = OnigScanner.prototype;
scanner.sources = sources;
return scanner;
}
OnigScanner.prototype.buildScanner = buildScanner;
OnigScanner.prototype.findNextMatch = findNextMatch;
this.OnigScanner = OnigScanner;
})();

View File

@@ -0,0 +1,116 @@
#import <Cocoa/Cocoa.h>
#import <iostream>
#import "CocoaOniguruma/OnigRegexp.h"
#import "include/cef_base.h"
#import "include/cef_v8.h"
#import "onig_scanner.h"
namespace v8_extensions {
extern NSString *stringFromCefV8Value(const CefRefPtr<CefV8Value>& value);
using namespace std;
class OnigScannerUserData : public CefBase {
public:
OnigScannerUserData(CefRefPtr<CefV8Value> sources) {
int length = sources->GetArrayLength();
regExps.resize(length);
for (int i = 0; i < length; i++) {
NSString *sourceString = stringFromCefV8Value(sources->GetValue(i));
regExps[i] = [[OnigRegexp compile:sourceString] retain];
}
}
~OnigScannerUserData() {
}
CefRefPtr<CefV8Value> CaptureIndicesForMatch(OnigResult *result) {
CefRefPtr<CefV8Value> array = CefV8Value::CreateArray([result count] * 3);
int i = 0;
int resultCount = [result count];
for (int index = 0; index < resultCount; index++) {
int captureLength = [result lengthAt:index];
int captureStart = [result locationAt:index];
array->SetValue(i++, CefV8Value::CreateInt(index));
array->SetValue(i++, CefV8Value::CreateInt(captureStart));
array->SetValue(i++, CefV8Value::CreateInt(captureStart + captureLength));
}
return array;
}
CefRefPtr<CefV8Value> FindNextMatch(CefRefPtr<CefV8Value> v8String, CefRefPtr<CefV8Value> v8StartLocation) {
NSString *string = stringFromCefV8Value(v8String);
int startLocation = v8StartLocation->GetIntValue();
int bestIndex = -1;
int bestLocation = NULL;
OnigResult *bestResult = NULL;
vector<OnigRegexp *>::iterator iter = regExps.begin();
int index = 0;
while (iter < regExps.end()) {
OnigRegexp *regExp = *iter;
OnigResult *result = [regExp search:string start:startLocation];
if ([result count] > 0) {
int location = [result locationAt:0];
if (bestIndex == -1 || location < bestLocation) {
bestLocation = location;
bestResult = result;
bestIndex = index;
}
if (location == startLocation) break;
}
iter++;
index++;
}
if (bestIndex >= 0) {
CefRefPtr<CefV8Value> result = CefV8Value::CreateObject(NULL);
result->SetValue("index", CefV8Value::CreateInt(bestIndex), V8_PROPERTY_ATTRIBUTE_NONE);
result->SetValue("captureIndices", CaptureIndicesForMatch(bestResult), V8_PROPERTY_ATTRIBUTE_NONE);
return result;
} else {
return CefV8Value::CreateNull();
}
}
protected:
std::vector<OnigRegexp *> regExps;
IMPLEMENT_REFCOUNTING(OnigRegexpUserData);
};
OnigScanner::OnigScanner() : CefV8Handler() {
NSString *filePath = [[[NSBundle mainBundle] resourcePath] stringByAppendingPathComponent:@"v8_extensions/onig_scanner.js"];
NSString *extensionCode = [NSString stringWithContentsOfFile:filePath encoding:NSUTF8StringEncoding error:nil];
CefRegisterExtension("v8/onig-scanner", [extensionCode UTF8String], this);
}
bool OnigScanner::Execute(const CefString& name,
CefRefPtr<CefV8Value> object,
const CefV8ValueList& arguments,
CefRefPtr<CefV8Value>& retval,
CefString& exception) {
if (name == "findNextMatch") {
OnigScannerUserData *userData = (OnigScannerUserData *)object->GetUserData().get();
retval = userData->FindNextMatch(arguments[0], arguments[1]);
return true;
}
else if (name == "buildScanner") {
retval = CefV8Value::CreateObject(NULL);
retval->SetUserData(new OnigScannerUserData(arguments[0]));
return true;
}
return false;
}
} // namespace v8_extensions

View File

@@ -85,11 +85,14 @@ class Rule
@allPatterns.push(pattern.getIncludedPatterns(included)...)
@allPatterns
getScanner: ->
@scanner ?= new OnigScanner(_.pluck(@getIncludedPatterns(), 'regexSource'))
getNextTokens: (stack, line, position) ->
patterns = @getIncludedPatterns()
{index, captureIndices} = OnigRegExp.captureIndices(line, position, patterns.map (p) -> p.regex )
return {} unless index?
return {} unless result = @getScanner().findNextMatch(line, position)
{ index, captureIndices } = result
[firstCaptureIndex, firstCaptureStart, firstCaptureEnd] = captureIndices
nextTokens = patterns[index].handleMatch(stack, line, captureIndices)
@@ -120,9 +123,11 @@ class Pattern
@match = match
else
@regex = new OnigRegExp(match)
@regexSource = match
@captures = captures
else if begin
@regex = new OnigRegExp(begin)
@regexSource = begin
@captures = beginCaptures ? captures
endPattern = new Pattern(@grammar, { match: end, captures: endCaptures ? captures, popRule: true})
@pushRule = new Rule(@grammar, { @scopeName, patterns, endPattern })