Files
MP-SPDZ/Compiler/allocator.py

629 lines
25 KiB
Python

import itertools, time
from collections import defaultdict, deque
from Compiler.exceptions import *
from Compiler.config import *
from Compiler.instructions import *
from Compiler.instructions_base import *
from Compiler.util import *
import Compiler.graph
import Compiler.program
import heapq, itertools
import operator
import sys
class StraightlineAllocator:
"""Allocate variables in a straightline program using n registers.
It is based on the precondition that every register is only defined once."""
def __init__(self, n):
self.alloc = {}
self.usage = Compiler.program.RegType.create_dict(lambda: 0)
self.defined = {}
self.dealloc = set()
self.n = n
def alloc_reg(self, reg, free):
base = reg.vectorbase
if base in self.alloc:
# already allocated
return
reg_type = reg.reg_type
size = base.size
if free[reg_type, size]:
res = free[reg_type, size].pop()
else:
if self.usage[reg_type] < self.n:
res = self.usage[reg_type]
self.usage[reg_type] += size
else:
raise RegisterOverflowError()
self.alloc[base] = res
if base.vector:
for i,r in enumerate(base.vector):
r.i = self.alloc[base] + i
base.i = self.alloc[base]
def dealloc_reg(self, reg, inst, free):
self.dealloc.add(reg)
base = reg.vectorbase
if base.vector and not inst.is_vec():
for i in base.vector:
if i not in self.dealloc:
# not all vector elements ready for deallocation
return
free[reg.reg_type, base.size].add(self.alloc[base])
if inst.is_vec() and base.vector:
self.defined[base] = inst
for i in base.vector:
self.defined[i] = inst
else:
self.defined[reg] = inst
def process(self, program, alloc_pool):
for k,i in enumerate(reversed(program)):
unused_regs = []
for j in i.get_def():
if j.vectorbase in self.alloc:
if j in self.defined:
raise CompilerError("Double write on register %s " \
"assigned by '%s' in %s" % \
(j,i,format_trace(i.caller)))
else:
# unused register
self.alloc_reg(j, alloc_pool)
unused_regs.append(j)
if unused_regs and len(unused_regs) == len(list(i.get_def())):
# only report if all assigned registers are unused
print "Register(s) %s never used, assigned by '%s' in %s" % \
(unused_regs,i,format_trace(i.caller))
for j in i.get_used():
self.alloc_reg(j, alloc_pool)
for j in i.get_def():
self.dealloc_reg(j, i, alloc_pool)
if k % 1000000 == 0 and k > 0:
print "Allocated registers for %d instructions at" % k, time.asctime()
# print "Successfully allocated registers"
# print "modp usage: %d clear, %d secret" % \
# (self.usage[Compiler.program.RegType.ClearModp], self.usage[Compiler.program.RegType.SecretModp])
# print "GF2N usage: %d clear, %d secret" % \
# (self.usage[Compiler.program.RegType.ClearGF2N], self.usage[Compiler.program.RegType.SecretGF2N])
return self.usage
def determine_scope(block, options):
last_def = defaultdict(lambda: -1)
used_from_scope = set()
def find_in_scope(reg, scope):
while True:
if scope is None:
return False
elif reg in scope.defined_registers:
return True
scope = scope.scope
def read(reg, n):
if last_def[reg] == -1:
if find_in_scope(reg, block.scope):
used_from_scope.add(reg)
reg.can_eliminate = False
else:
print 'Warning: read before write at register', reg
print '\tline %d: %s' % (n, instr)
print '\tinstruction trace: %s' % format_trace(instr.caller, '\t\t')
print '\tregister trace: %s' % format_trace(reg.caller, '\t\t')
if options.stop:
sys.exit(1)
def write(reg, n):
if last_def[reg] != -1:
print 'Warning: double write at register', reg
print '\tline %d: %s' % (n, instr)
print '\ttrace: %s' % format_trace(instr.caller, '\t\t')
if options.stop:
sys.exit(1)
last_def[reg] = n
for n,instr in enumerate(block.instructions):
outputs,inputs = instr.get_def(), instr.get_used()
for reg in inputs:
if reg.vector and instr.is_vec():
for i in reg.vector:
read(i, n)
else:
read(reg, n)
for reg in outputs:
if reg.vector and instr.is_vec():
for i in reg.vector:
write(i, n)
else:
write(reg, n)
block.used_from_scope = used_from_scope
block.defined_registers = set(last_def.iterkeys())
class Merger:
def __init__(self, block, options, merge_classes):
self.block = block
self.instructions = block.instructions
self.options = options
if options.max_parallel_open:
self.max_parallel_open = int(options.max_parallel_open)
else:
self.max_parallel_open = float('inf')
self.counter = defaultdict(lambda: 0)
self.dependency_graph(merge_classes)
def do_merge(self, merges_iter):
""" Merge an iterable of nodes in G, returning the number of merged
instructions and the index of the merged instruction. """
# sort merges, necessary for inputb
merge = list(merges_iter)
merge.sort()
merges_iter = iter(merge)
instructions = self.instructions
mergecount = 0
try:
n = next(merges_iter)
except StopIteration:
return mergecount, None
def expand_vector_args(inst):
if inst.is_vec():
for arg in inst.args:
arg.create_vector_elements()
res = sum(zip(*inst.args), ())
return list(res)
else:
return inst.args
for i in merges_iter:
if isinstance(instructions[n], startinput_class):
instructions[n].args[1] += instructions[i].args[1]
elif isinstance(instructions[n], (stopinput, gstopinput)):
if instructions[n].get_size() != instructions[i].get_size():
raise NotImplemented()
else:
instructions[n].args += instructions[i].args[1:]
else:
if instructions[n].get_size() != instructions[i].get_size():
# merge as non-vector instruction
instructions[n].args = expand_vector_args(instructions[n]) + \
expand_vector_args(instructions[i])
if instructions[n].is_vec():
instructions[n].size = 1
else:
instructions[n].args += instructions[i].args
# join arg_formats if not special iterators
# if not isinstance(instructions[n].arg_format, (itertools.repeat, itertools.cycle)) and \
# not isinstance(instructions[i].arg_format, (itertools.repeat, itertools.cycle)):
# instructions[n].arg_format += instructions[i].arg_format
instructions[i] = None
self.merge_nodes(n, i)
mergecount += 1
return mergecount, n
def compute_max_depths(self, depth_of):
""" Compute the maximum 'depth' at which every instruction can be placed.
This is the minimum depth of any merge_node succeeding an instruction.
Similar to DAG shortest paths algorithm. Traverses the graph in reverse
topological order, updating the max depth of each node's predecessors.
"""
G = self.G
merge_nodes_set = self.open_nodes
top_order = Compiler.graph.topological_sort(G)
max_depth_of = [None] * len(G)
max_depth = max(depth_of)
for i in range(len(max_depth_of)):
if i in merge_nodes_set:
max_depth_of[i] = depth_of[i] - 1
else:
max_depth_of[i] = max_depth
for u in reversed(top_order):
for v in G.pred[u]:
if v not in merge_nodes_set:
max_depth_of[v] = min(max_depth_of[u], max_depth_of[v])
return max_depth_of
def merge_inputs(self):
merges = defaultdict(list)
remaining_input_nodes = []
def do_merge(nodes):
if len(nodes) > 1000:
print 'Merging %d inputs...' % len(nodes)
self.do_merge(iter(nodes))
for n in self.input_nodes:
inst = self.instructions[n]
merge = merges[inst.args[0],inst.__class__]
if len(merge) == 0:
remaining_input_nodes.append(n)
merge.append(n)
if len(merge) >= self.max_parallel_open:
do_merge(merge)
merge[:] = []
for merge in reversed(sorted(merges.itervalues())):
if merge:
do_merge(merge)
self.input_nodes = remaining_input_nodes
def compute_preorder(self, merges, rev_depth_of):
# find flexible nodes that can be on several levels
# and find sources on level 0
G = self.G
merge_nodes_set = self.open_nodes
depth_of = self.depths
instructions = self.instructions
flex_nodes = defaultdict(dict)
starters = []
for n in xrange(len(G)):
if n not in merge_nodes_set and \
depth_of[n] != rev_depth_of[n] and G[n] and G.get_attr(n,'start') == -1 and not isinstance(instructions[n], AsymmetricCommunicationInstruction):
#print n, depth_of[n], rev_depth_of[n]
flex_nodes[depth_of[n]].setdefault(rev_depth_of[n], set()).add(n)
elif len(G.pred[n]) == 0 and \
not isinstance(self.instructions[n], RawInputInstruction):
starters.append(n)
if n % 10000000 == 0 and n > 0:
print "Processed %d nodes at" % n, time.asctime()
inputs = defaultdict(list)
for node in self.input_nodes:
player = self.instructions[node].args[0]
inputs[player].append(node)
first_inputs = [l[0] for l in inputs.itervalues()]
other_inputs = []
i = 0
while True:
i += 1
found = False
for l in inputs.itervalues():
if i < len(l):
other_inputs.append(l[i])
found = True
if not found:
break
other_inputs.reverse()
preorder = []
# magical preorder for topological search
max_depth = max(merges)
if max_depth > 10000:
print "Computing pre-ordering ..."
for i in xrange(max_depth, 0, -1):
preorder.append(G.get_attr(merges[i], 'stop'))
for j in flex_nodes[i-1].itervalues():
preorder.extend(j)
preorder.extend(flex_nodes[0].get(i, []))
preorder.append(merges[i])
if i % 100000 == 0 and i > 0:
print "Done level %d at" % i, time.asctime()
preorder.extend(other_inputs)
preorder.extend(starters)
preorder.extend(first_inputs)
if max_depth > 10000:
print "Done at", time.asctime()
return preorder
def longest_paths_merge(self):
""" Attempt to merge instructions of type instruction_type (which are given in
merge_nodes) using longest paths algorithm.
Returns the no. of rounds of communication required after merging (assuming 1 round/instruction).
Doesn't use networkx.
"""
G = self.G
instructions = self.instructions
merge_nodes = self.open_nodes
depths = self.depths
if not merge_nodes and not self.input_nodes:
return 0
# merge opens at same depth
merges = defaultdict(list)
for node in merge_nodes:
merges[depths[node]].append(node)
# after merging, the first element in merges[i] remains for each depth i,
# all others are removed from instructions and G
last_nodes = [None, None]
for i in sorted(merges):
merge = merges[i]
t = type(self.instructions[merge[0]])
self.counter[t] += len(merge)
if len(merge) > 1000:
print 'Merging %d %s in round %d/%d' % \
(len(merge), t.__name__, i, len(merges))
self.do_merge(merge)
self.merge_inputs()
preorder = None
if len(instructions) > 100000:
print "Topological sort ..."
order = Compiler.graph.topological_sort(G, preorder)
instructions[:] = [instructions[i] for i in order if instructions[i] is not None]
if len(instructions) > 100000:
print "Done at", time.asctime()
return len(merges)
def dependency_graph(self, merge_classes):
""" Create the program dependency graph. """
block = self.block
options = self.options
open_nodes = set()
self.open_nodes = open_nodes
self.input_nodes = []
colordict = defaultdict(lambda: 'gray', asm_open='red',\
ldi='lightblue', ldm='lightblue', stm='blue',\
mov='yellow', mulm='orange', mulc='orange',\
triple='green', square='green', bit='green',\
asm_input='lightgreen')
G = Compiler.graph.SparseDiGraph(len(block.instructions))
self.G = G
reg_nodes = {}
last_def = defaultdict(lambda: -1)
last_mem_write = []
last_mem_read = []
warned_about_mem = []
last_mem_write_of = defaultdict(list)
last_mem_read_of = defaultdict(list)
last_print_str = None
last = defaultdict(lambda: defaultdict(lambda: None))
last_open = deque()
last_text_input = [None, None]
depths = [0] * len(block.instructions)
self.depths = depths
parallel_open = defaultdict(lambda: 0)
next_available_depth = {}
self.sources = []
self.real_depths = [0] * len(block.instructions)
round_type = {}
def add_edge(i, j):
G.add_edge(i, j)
for d in (self.depths, self.real_depths):
if d[j] < d[i]:
d[j] = d[i]
def read(reg, n):
if last_def[reg] != -1:
add_edge(last_def[reg], n)
def write(reg, n):
last_def[reg] = n
def handle_mem_access(addr, reg_type, last_access_this_kind,
last_access_other_kind):
this = last_access_this_kind[addr,reg_type]
other = last_access_other_kind[addr,reg_type]
if this and other:
if this[-1] < other[0]:
del this[:]
this.append(n)
for inst in other:
add_edge(inst, n)
def mem_access(n, instr, last_access_this_kind, last_access_other_kind):
addr = instr.args[1]
reg_type = instr.args[0].reg_type
if isinstance(addr, int):
for i in range(min(instr.get_size(), 100)):
addr_i = addr + i
handle_mem_access(addr_i, reg_type, last_access_this_kind,
last_access_other_kind)
if not warned_about_mem and (instr.get_size() > 100):
print 'WARNING: Order of memory instructions ' \
'not preserved due to long vector, errors possible'
warned_about_mem.append(True)
else:
handle_mem_access(addr, reg_type, last_access_this_kind,
last_access_other_kind)
if not warned_about_mem and not isinstance(instr, DirectMemoryInstruction):
print 'WARNING: Order of memory instructions ' \
'not preserved, errors possible'
# hack
warned_about_mem.append(True)
def keep_order(instr, n, t, arg_index=None):
if arg_index is None:
player = None
else:
player = instr.args[arg_index]
if last[t][player] is not None:
add_edge(last[t][player], n)
last[t][player] = n
for n,instr in enumerate(block.instructions):
outputs,inputs = instr.get_def(), instr.get_used()
G.add_node(n)
# if options.debug:
# col = colordict[instr.__class__.__name__]
# G.add_node(n, color=col, label=str(instr))
for reg in inputs:
if reg.vector and instr.is_vec():
for i in reg.vector:
read(i, n)
else:
read(reg, n)
for reg in outputs:
if reg.vector and instr.is_vec():
for i in reg.vector:
write(i, n)
else:
write(reg, n)
# will be merged
if isinstance(instr, TextInputInstruction):
if last_text_input[0] is not None:
if instr.merge_id() != \
block.instructions[last_text_input[0]].merge_id():
add_edge(last_text_input[0], n)
last_text_input[1] = last_text_input[0]
elif last_text_input[1] is not None:
add_edge(last_text_input[1], n)
last_text_input[0] = n
if isinstance(instr, merge_classes):
open_nodes.add(n)
G.add_node(n, merges=[])
# the following must happen after adding the edge
self.real_depths[n] += 1
depth = depths[n] + 1
# find first depth that has the right type and isn't full
skipped_depths = set()
while (depth in round_type and \
round_type[depth] != instr.merge_id()) or \
(int(options.max_parallel_open) > 0 and \
parallel_open[depth] >= int(options.max_parallel_open)):
skipped_depths.add(depth)
depth = next_available_depth.get((type(instr), depth), \
depth + 1)
for d in skipped_depths:
next_available_depth[type(instr), d] = depth
round_type[depth] = instr.merge_id()
parallel_open[depth] += len(instr.args) * instr.get_size()
depths[n] = depth
if isinstance(instr, ReadMemoryInstruction):
if options.preserve_mem_order:
if last_mem_write and last_mem_read and last_mem_write[-1] > last_mem_read[-1]:
last_mem_read[:] = []
last_mem_read.append(n)
for i in last_mem_write:
add_edge(i, n)
else:
mem_access(n, instr, last_mem_read_of, last_mem_write_of)
elif isinstance(instr, WriteMemoryInstruction):
if options.preserve_mem_order:
if last_mem_write and last_mem_read and last_mem_write[-1] < last_mem_read[-1]:
last_mem_write[:] = []
last_mem_write.append(n)
for i in last_mem_read:
add_edge(i, n)
else:
mem_access(n, instr, last_mem_write_of, last_mem_read_of)
# keep I/O instructions in order
elif isinstance(instr, IOInstruction):
if last_print_str is not None:
add_edge(last_print_str, n)
last_print_str = n
elif isinstance(instr, PublicFileIOInstruction):
keep_order(instr, n, instr.__class__)
elif isinstance(instr, RawInputInstruction):
keep_order(instr, n, instr.__class__, 0)
self.input_nodes.append(n)
G.add_node(n, merges=[])
player = instr.args[0]
if isinstance(instr, stopinput):
add_edge(last[startinput_class][player], n)
elif isinstance(instr, gstopinput):
add_edge(last[gstartinput][player], n)
elif isinstance(instr, startprivateoutput_class):
keep_order(instr, n, startprivateoutput_class, 2)
elif isinstance(instr, stopprivateoutput_class):
keep_order(instr, n, stopprivateoutput_class, 1)
elif isinstance(instr, prep_class):
keep_order(instr, n, instr.args[0])
elif isinstance(instr, StackInstruction):
keep_order(instr, n, StackInstruction)
if not G.pred[n]:
self.sources.append(n)
if n % 100000 == 0 and n > 0:
print "Processed dependency of %d/%d instructions at" % \
(n, len(block.instructions)), time.asctime()
if len(open_nodes) > 1000:
print "Program has %d %s instructions" % (len(open_nodes), merge_classes)
def merge_nodes(self, i, j):
""" Merge node j into i, removing node j """
G = self.G
if j in G[i]:
G.remove_edge(i, j)
if i in G[j]:
G.remove_edge(j, i)
G.add_edges_from(zip(itertools.cycle([i]), G[j], [G.weights[(j,k)] for k in G[j]]))
G.add_edges_from(zip(G.pred[j], itertools.cycle([i]), [G.weights[(k,j)] for k in G.pred[j]]))
G.get_attr(i, 'merges').append(j)
G.remove_node(j)
def eliminate_dead_code(self):
instructions = self.instructions
G = self.G
merge_nodes = self.open_nodes
count = 0
open_count = 0
stats = defaultdict(lambda: 0)
for i,inst in zip(xrange(len(instructions) - 1, -1, -1), reversed(instructions)):
# remove if instruction has result that isn't used
unused_result = not G.degree(i) and len(list(inst.get_def())) \
and reduce(operator.and_, (reg.can_eliminate for reg in inst.get_def())) \
and not isinstance(inst, (DoNotEliminateInstruction))
stop_node = G.get_attr(i, 'stop')
unused_startopen = stop_node != -1 and instructions[stop_node] is None
def eliminate(i):
G.remove_node(i)
merge_nodes.discard(i)
stats[type(instructions[i]).__name__] += 1
instructions[i] = None
if unused_result or unused_startopen:
eliminate(i)
count += 1
if unused_startopen:
open_count += len(inst.args)
# remove unnecessary stack instructions
# left by optimization with budget
if isinstance(inst, popint_class) and \
(not G.degree(i) or (G.degree(i) == 1 and
isinstance(instructions[list(G[i])[0]], StackInstruction))) \
and \
inst.args[0].can_eliminate and \
len(G.pred[i]) == 1 and \
isinstance(instructions[list(G.pred[i])[0]], pushint_class):
eliminate(list(G.pred[i])[0])
eliminate(i)
count += 2
if count > 0:
print 'Eliminated %d dead instructions, among which %d opens: %s' \
% (count, open_count, dict(stats))
def print_graph(self, filename):
f = open(filename, 'w')
print >>f, 'digraph G {'
for i in range(self.G.n):
for j in self.G[i]:
print >>f, '"%d: %s" -> "%d: %s";' % \
(i, self.instructions[i], j, self.instructions[j])
print >>f, '}'
f.close()
def print_depth(self, filename):
f = open(filename, 'w')
for i in range(self.G.n):
print >>f, '%d: %s' % (self.depths[i], self.instructions[i])
f.close()