Tag rules in grammar with wether or not they have been seen

This is instead of keeping a std::set with rule identifiers. Keeping the information in the grammar is a lot faster (about 25%) as we can update the status in O(1) without any memory allocation.

The downside is that the grammar is now being mutated by the parser. This is currently safe because only a single thread is used for parsing. When we switch to allowing multiple threads to perform parsing, we should make a copy of the grammar for each instance.

Another downside is that we only tag rules that have begin/match patterns, so rules that are wrappers for a set of rules, or rules that are including another rule, are never rejected, even if already visited, but the target rules they resolve to will be, though if an include (indirectly) include itself, we will no longer break such cycle (though it is clearly a bug in the grammar, if this happens, and we could preprocess the grammar to catch it).
This commit is contained in:
Allan Odgaard
2013-08-17 22:09:03 +02:00
parent ee43777c3a
commit 2bd7b877e6
2 changed files with 27 additions and 26 deletions

View File

@@ -229,25 +229,19 @@ namespace parse
scope = res[stack.back().first] = stack.back().second;
}
static void collect_children (rule_ptr const& base, std::vector<rule_ptr> const& children, std::set<size_t>& unique, std::vector<rule_ptr>& res);
static void collect_children (rule_ptr const& base, std::vector<rule_ptr> const& children, std::vector<rule_ptr>& res);
static rule_ptr resolve_include (rule_ptr const& base, rule_ptr rule, std::set<size_t>& unique)
static rule_ptr resolve_include (rule_ptr const& base, rule_ptr rule)
{
while(rule && rule->include_string != NULL_STR)
{
std::string const& name = rule->include_string;
if(rule = rule->include.lock())
{
if(unique.find(rule->rule_id) != unique.end())
break;
unique.insert(rule->rule_id);
}
else if(name == "$base")
{
rule = base;
if(unique.find(rule->rule_id) != unique.end())
break;
unique.insert(rule->rule_id);
}
else
{
@@ -257,30 +251,30 @@ namespace parse
return rule;
}
static void collect_rule (rule_ptr const& base, rule_ptr rule, std::set<size_t>& unique, std::vector<rule_ptr>& res)
static void collect_rule (rule_ptr const& base, rule_ptr rule, std::vector<rule_ptr>& res)
{
if(unique.find(rule->rule_id) != unique.end())
return;
unique.insert(rule->rule_id);
rule = resolve_include(base, rule, unique);
if(!rule)
rule = resolve_include(base, rule);
if(!rule || rule->included)
return;
if(rule->match_pattern)
{
rule->included = true;
res.push_back(rule);
}
else if(!rule->children.empty())
collect_children(base, rule->children, unique, res);
{
collect_children(base, rule->children, res);
}
}
static void collect_children (rule_ptr const& base, std::vector<rule_ptr> const& children, std::set<size_t>& unique, std::vector<rule_ptr>& res)
static void collect_children (rule_ptr const& base, std::vector<rule_ptr> const& children, std::vector<rule_ptr>& res)
{
for(rule_ptr const& rule : children)
collect_rule(base, rule, unique, res);
collect_rule(base, rule, res);
}
static void collect_injections (rule_ptr const& base, stack_ptr const& stack, std::set<size_t>& unique, scope::context_t const& scope, std::vector<rule_ptr>& res)
static void collect_injections (rule_ptr const& base, stack_ptr const& stack, scope::context_t const& scope, std::vector<rule_ptr>& res)
{
for(stack_ptr node = stack; node; node = node->parent)
{
@@ -290,14 +284,14 @@ namespace parse
for(auto const& pair : *node->rule->injections)
{
if(scope::selector_t(pair.first).does_match(scope))
collect_rule(base, pair.second, unique, res);
collect_rule(base, pair.second, res);
}
}
for(auto const& pair : injected_grammars())
{
if(pair.first.does_match(scope))
collect_children(base, pair.second->children, unique, res);
collect_children(base, pair.second->children, res);
}
}
@@ -312,11 +306,10 @@ namespace parse
res.insert((ranked_match_t){ stack->rule, match, stack->apply_end_last ? SIZE_T_MAX : 0 });
}
std::set<size_t> unique;
std::vector<rule_ptr> rules;
collect_injections(base, stack, unique, scope::context_t(stack->scope, ""), rules);
collect_children(base, stack->rule->children, unique, rules);
collect_injections(base, stack, unique, scope::context_t("", stack->scope), rules);
collect_injections(base, stack, scope::context_t(stack->scope, ""), rules);
collect_children(base, stack->rule->children, rules);
collect_injections(base, stack, scope::context_t("", stack->scope), rules);
// ============================
// = Match rules against text =
@@ -325,6 +318,8 @@ namespace parse
std::vector<std::pair<rule_ptr, size_t>> v;
for(size_t j = 0; j < rules.size(); ++j)
{
rules[j]->included = false;
std::map<size_t, regexp::match_t>::iterator it = match_cache.find(rules[j]->rule_id);
if(it != match_cache.end())
{

View File

@@ -55,6 +55,12 @@ namespace parse
regexp::pattern_t match_pattern;
regexp::pattern_t while_pattern;
regexp::pattern_t end_pattern;
// =================
// = Mutable State =
// =================
bool included = false;
};
struct stack_t