mirror of
https://github.com/atom/atom.git
synced 2026-04-06 03:02:13 -04:00
Add native OnigScanner, which handles matching multiple regexes on a line
This cuts the tokenization time from 250ms to 70ms by avoiding js <-> native overhead
This commit is contained in:
2
atom.gyp
2
atom.gyp
@@ -285,6 +285,8 @@
|
||||
'native/v8_extensions/native.h',
|
||||
'native/v8_extensions/onig_reg_exp.mm',
|
||||
'native/v8_extensions/onig_reg_exp.h',
|
||||
'native/v8_extensions/onig_scanner.mm',
|
||||
'native/v8_extensions/onig_scanner.h',
|
||||
'native/v8_extensions/atom.mm',
|
||||
'native/v8_extensions/atom.h',
|
||||
],
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#import "native/v8_extensions/atom.h"
|
||||
#import "native/v8_extensions/native.h"
|
||||
#import "native/v8_extensions/onig_reg_exp.h"
|
||||
#import "native/v8_extensions/onig_scanner.h"
|
||||
#import "native/message_translation.h"
|
||||
#include <iostream>
|
||||
|
||||
@@ -9,6 +10,7 @@ void AtomCefRenderProcessHandler::OnWebKitInitialized() {
|
||||
new v8_extensions::Atom();
|
||||
new v8_extensions::Native();
|
||||
new v8_extensions::OnigRegExp();
|
||||
new v8_extensions::OnigScanner();
|
||||
}
|
||||
|
||||
void AtomCefRenderProcessHandler::OnContextCreated(CefRefPtr<CefBrowser> browser,
|
||||
|
||||
20
native/v8_extensions/onig_scanner.h
Normal file
20
native/v8_extensions/onig_scanner.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#include "include/cef_base.h"
|
||||
#include "include/cef_v8.h"
|
||||
|
||||
namespace v8_extensions {
|
||||
|
||||
class OnigScanner : public CefV8Handler {
|
||||
public:
|
||||
OnigScanner();
|
||||
|
||||
virtual bool Execute(const CefString& name,
|
||||
CefRefPtr<CefV8Value> object,
|
||||
const CefV8ValueList& arguments,
|
||||
CefRefPtr<CefV8Value>& retval,
|
||||
CefString& exception) OVERRIDE;
|
||||
|
||||
// Provide the reference counting implementation for this class.
|
||||
IMPLEMENT_REFCOUNTING(OnigRegExp);
|
||||
};
|
||||
|
||||
}
|
||||
17
native/v8_extensions/onig_scanner.js
Normal file
17
native/v8_extensions/onig_scanner.js
Normal file
@@ -0,0 +1,17 @@
|
||||
(function() {
|
||||
native function buildScanner(sources);
|
||||
native function findNextMatch(string, startPosition);
|
||||
|
||||
function OnigScanner(sources) {
|
||||
var scanner = buildScanner(sources);
|
||||
scanner.constructor = OnigScanner;
|
||||
scanner.__proto__ = OnigScanner.prototype;
|
||||
scanner.sources = sources;
|
||||
return scanner;
|
||||
}
|
||||
|
||||
OnigScanner.prototype.buildScanner = buildScanner;
|
||||
OnigScanner.prototype.findNextMatch = findNextMatch;
|
||||
|
||||
this.OnigScanner = OnigScanner;
|
||||
})();
|
||||
116
native/v8_extensions/onig_scanner.mm
Normal file
116
native/v8_extensions/onig_scanner.mm
Normal file
@@ -0,0 +1,116 @@
|
||||
#import <Cocoa/Cocoa.h>
|
||||
#import <iostream>
|
||||
#import "CocoaOniguruma/OnigRegexp.h"
|
||||
#import "include/cef_base.h"
|
||||
#import "include/cef_v8.h"
|
||||
#import "onig_scanner.h"
|
||||
|
||||
namespace v8_extensions {
|
||||
|
||||
extern NSString *stringFromCefV8Value(const CefRefPtr<CefV8Value>& value);
|
||||
using namespace std;
|
||||
|
||||
class OnigScannerUserData : public CefBase {
|
||||
public:
|
||||
OnigScannerUserData(CefRefPtr<CefV8Value> sources) {
|
||||
int length = sources->GetArrayLength();
|
||||
|
||||
regExps.resize(length);
|
||||
for (int i = 0; i < length; i++) {
|
||||
NSString *sourceString = stringFromCefV8Value(sources->GetValue(i));
|
||||
regExps[i] = [[OnigRegexp compile:sourceString] retain];
|
||||
}
|
||||
}
|
||||
|
||||
~OnigScannerUserData() {
|
||||
}
|
||||
|
||||
CefRefPtr<CefV8Value> CaptureIndicesForMatch(OnigResult *result) {
|
||||
CefRefPtr<CefV8Value> array = CefV8Value::CreateArray([result count] * 3);
|
||||
int i = 0;
|
||||
int resultCount = [result count];
|
||||
for (int index = 0; index < resultCount; index++) {
|
||||
int captureLength = [result lengthAt:index];
|
||||
int captureStart = [result locationAt:index];
|
||||
|
||||
array->SetValue(i++, CefV8Value::CreateInt(index));
|
||||
array->SetValue(i++, CefV8Value::CreateInt(captureStart));
|
||||
array->SetValue(i++, CefV8Value::CreateInt(captureStart + captureLength));
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
CefRefPtr<CefV8Value> FindNextMatch(CefRefPtr<CefV8Value> v8String, CefRefPtr<CefV8Value> v8StartLocation) {
|
||||
NSString *string = stringFromCefV8Value(v8String);
|
||||
int startLocation = v8StartLocation->GetIntValue();
|
||||
|
||||
int bestIndex = -1;
|
||||
int bestLocation = NULL;
|
||||
OnigResult *bestResult = NULL;
|
||||
|
||||
vector<OnigRegexp *>::iterator iter = regExps.begin();
|
||||
int index = 0;
|
||||
|
||||
while (iter < regExps.end()) {
|
||||
OnigRegexp *regExp = *iter;
|
||||
OnigResult *result = [regExp search:string start:startLocation];
|
||||
|
||||
if ([result count] > 0) {
|
||||
int location = [result locationAt:0];
|
||||
if (bestIndex == -1 || location < bestLocation) {
|
||||
bestLocation = location;
|
||||
bestResult = result;
|
||||
bestIndex = index;
|
||||
}
|
||||
|
||||
if (location == startLocation) break;
|
||||
}
|
||||
|
||||
iter++;
|
||||
index++;
|
||||
}
|
||||
|
||||
if (bestIndex >= 0) {
|
||||
CefRefPtr<CefV8Value> result = CefV8Value::CreateObject(NULL);
|
||||
result->SetValue("index", CefV8Value::CreateInt(bestIndex), V8_PROPERTY_ATTRIBUTE_NONE);
|
||||
result->SetValue("captureIndices", CaptureIndicesForMatch(bestResult), V8_PROPERTY_ATTRIBUTE_NONE);
|
||||
return result;
|
||||
} else {
|
||||
return CefV8Value::CreateNull();
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<OnigRegexp *> regExps;
|
||||
|
||||
IMPLEMENT_REFCOUNTING(OnigRegexpUserData);
|
||||
};
|
||||
|
||||
OnigScanner::OnigScanner() : CefV8Handler() {
|
||||
NSString *filePath = [[[NSBundle mainBundle] resourcePath] stringByAppendingPathComponent:@"v8_extensions/onig_scanner.js"];
|
||||
NSString *extensionCode = [NSString stringWithContentsOfFile:filePath encoding:NSUTF8StringEncoding error:nil];
|
||||
CefRegisterExtension("v8/onig-scanner", [extensionCode UTF8String], this);
|
||||
}
|
||||
|
||||
|
||||
bool OnigScanner::Execute(const CefString& name,
|
||||
CefRefPtr<CefV8Value> object,
|
||||
const CefV8ValueList& arguments,
|
||||
CefRefPtr<CefV8Value>& retval,
|
||||
CefString& exception) {
|
||||
if (name == "findNextMatch") {
|
||||
OnigScannerUserData *userData = (OnigScannerUserData *)object->GetUserData().get();
|
||||
retval = userData->FindNextMatch(arguments[0], arguments[1]);
|
||||
return true;
|
||||
}
|
||||
else if (name == "buildScanner") {
|
||||
retval = CefV8Value::CreateObject(NULL);
|
||||
retval->SetUserData(new OnigScannerUserData(arguments[0]));
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace v8_extensions
|
||||
@@ -85,11 +85,14 @@ class Rule
|
||||
@allPatterns.push(pattern.getIncludedPatterns(included)...)
|
||||
@allPatterns
|
||||
|
||||
getScanner: ->
|
||||
@scanner ?= new OnigScanner(_.pluck(@getIncludedPatterns(), 'regexSource'))
|
||||
|
||||
getNextTokens: (stack, line, position) ->
|
||||
patterns = @getIncludedPatterns()
|
||||
{index, captureIndices} = OnigRegExp.captureIndices(line, position, patterns.map (p) -> p.regex )
|
||||
|
||||
return {} unless index?
|
||||
return {} unless result = @getScanner().findNextMatch(line, position)
|
||||
{ index, captureIndices } = result
|
||||
|
||||
[firstCaptureIndex, firstCaptureStart, firstCaptureEnd] = captureIndices
|
||||
nextTokens = patterns[index].handleMatch(stack, line, captureIndices)
|
||||
@@ -120,9 +123,11 @@ class Pattern
|
||||
@match = match
|
||||
else
|
||||
@regex = new OnigRegExp(match)
|
||||
@regexSource = match
|
||||
@captures = captures
|
||||
else if begin
|
||||
@regex = new OnigRegExp(begin)
|
||||
@regexSource = begin
|
||||
@captures = beginCaptures ? captures
|
||||
endPattern = new Pattern(@grammar, { match: end, captures: endCaptures ? captures, popRule: true})
|
||||
@pushRule = new Rule(@grammar, { @scopeName, patterns, endPattern })
|
||||
|
||||
Reference in New Issue
Block a user