ARM support.

This commit is contained in:
Marcel Keller
2021-04-19 21:26:54 +10:00
parent 6c89808733
commit 0f656fa7b7
112 changed files with 1736 additions and 475 deletions

3
.gitmodules vendored
View File

@@ -7,3 +7,6 @@
[submodule "Programs/Circuits"]
path = Programs/Circuits
url = https://github.com/mkskeller/bristol-fashion
[submodule "simde"]
path = simde
url = https://github.com/simd-everywhere/simde

View File

@@ -7,11 +7,10 @@
#define COMMON_INC_KEY_H_
#include <iostream>
#include <emmintrin.h>
#include <smmintrin.h>
#include <string.h>
#include "Tools/FlexBuffer.h"
#include "Tools/intrinsics.h"
#include "Math/gf2nlong.h"
using namespace std;

View File

@@ -371,7 +371,7 @@ void FakeProgramParty::receive_spdz_wires(ReceivedMsg& msg)
spdz_mac_key.unpack(spdz_wires[op].back());
if (!MC)
{
MC = new Passing_MAC_Check<Share<gf2n_long>>(spdz_mac_key);
MC = new MAC_Check_<Share<gf2n_long>>(spdz_mac_key);
cout << "MAC key: " << hex << spdz_mac_key << endl;
mac_key = spdz_mac_key;
}

View File

@@ -1,5 +1,14 @@
The changelog explains changes pulled through from the private development repository. Bug fixes and small enhancements are committed between releases and not documented here.
## 0.2.4 (Apr 19, 2021)
- ARM support
- Base OTs optionally without SimpleOT/AVX
- Use OpenSSL instead of Crypto++ for elliptic curves
- Post-sacrifice binary computation with replicated secret sharing similar
to [Araki et al.](https://www.ieee-security.org/TC/SP2017/papers/96.pdf)
- More flexible multithreading
## 0.2.3 (Feb 23, 2021)
- Distributed key generation for homomorphic encryption with active security similar to [Rotaru et al.](https://eprint.iacr.org/2019/1300)

32
CONFIG
View File

@@ -3,7 +3,6 @@ ROOT = .
OPTIM= -O3
#PROF = -pg
#DEBUG = -DDEBUG
#MEMPROTECT = -DMEMPROTECT
GDEBUG = -g
# set this to your preferred local storage directory
@@ -12,8 +11,8 @@ PREP_DIR = '-DPREP_DIR="Player-Data/"'
# set for SHE preprocessing (SPDZ and Overdrive)
USE_NTL = 0
# set for using GF(2^128) online phase, OT, MASCOT, or BMR
# unset for GF(2^40) online and offline phase
# set for using GF(2^128)
# unset for GF(2^40)
USE_GF2N_LONG = 1
# set to -march=<architecture> for optimization
@@ -28,6 +27,24 @@ USE_GF2N_LONG = 1
ARCH = -mtune=native -msse4.1 -msse4.2 -maes -mpclmul -mavx -mavx2 -mbmi2 -madx
ARCH = -march=native
MACHINE := $(shell uname -m)
OS := $(shell uname -s)
ifeq ($(MACHINE), x86_64)
# set this to 0 to avoid using AVX for OT
ifeq ($(OS), Linux)
CHECK_AVX := $(shell grep -q avx /proc/cpuinfo; echo $$?)
ifeq ($(CHECK_AVX), 0)
AVX_OT = 1
else
AVX_OT = 0
endif
else
AVX_OT = 1
endif
else
AVX_OT = 0
endif
# allow to set compiler in CONFIG.mine
CXX = g++
@@ -38,6 +55,10 @@ ifeq ($(USE_GF2N_LONG),1)
GF2N_LONG = -DUSE_GF2N_LONG
endif
ifeq ($(AVX_OT), 0)
CFLAGS += -DNO_AVX_OT
endif
# MAX_MOD_SZ (for FHE) must be least and GFP_MOD_SZ (for computation)
# must be exactly ceil(len(p)/len(word)) for the relevant prime p
# GFP_MOD_SZ only needs to be set for primes of bit length more that 256.
@@ -51,7 +72,6 @@ ifeq ($(USE_NTL),1)
LDLIBS := -lntl $(LDLIBS)
endif
OS := $(shell uname -s)
ifeq ($(OS), Linux)
LDLIBS += -lrt
endif
@@ -62,12 +82,10 @@ else
BOOST = -lboost_thread $(MY_BOOST)
endif
CFLAGS += $(ARCH) $(MY_CFLAGS) $(GDEBUG) -Wextra -Wall $(OPTIM) -I$(ROOT) -pthread $(PROF) $(DEBUG) $(MOD) $(MEMPROTECT) $(GF2N_LONG) $(PREP_DIR) $(SECURE) -std=c++11 -Werror
CFLAGS += $(ARCH) $(MY_CFLAGS) $(GDEBUG) -Wextra -Wall $(OPTIM) -I$(ROOT) -pthread $(PROF) $(DEBUG) $(MOD) $(GF2N_LONG) $(PREP_DIR) $(SECURE) -std=c++11 -Werror
CPPFLAGS = $(CFLAGS)
LD = $(CXX)
ECLIB = -lcryptopp
ifeq ($(OS), Darwin)
ifeq ($(USE_NTL),1)
CFLAGS += -Wno-error=unused-parameter

View File

@@ -284,7 +284,7 @@ class sbits(bits):
Instances can be also be initalized from :py:obj:`~Compiler.types.regint`
and :py:obj:`~Compiler.types.sint`.
"""
max_length = 128
max_length = 64
reg_type = 'sb'
is_clear = False
clear_type = cbits

View File

@@ -190,6 +190,8 @@ def TruncLeakyInRing(a, k, m, signed):
Returns a >> m.
Requires a < 2^k and leaks a % 2^m (needs to be constant or random).
"""
if k == m:
return 0
assert k > m
assert int(program.options.ring) >= k
from .types import sint, intbitint, cint, cgf2n

View File

@@ -103,7 +103,7 @@ class HeapQ(object):
childpos = MemValue(start * shift)
@for_range(self.levels - 1)
def f(i):
parentpos = childpos.right_shift(1, self.levels)
parentpos = childpos.right_shift(1, self.levels + 1)
parent, parent_state = self.heap.read_and_maybe_remove(parentpos)
child, child_state = self.heap.read_and_maybe_remove(childpos)
swap = parent > child

View File

@@ -1,3 +1,4 @@
import math
from math import log, floor, ceil
from Compiler.instructions import *
from . import types
@@ -411,6 +412,8 @@ def TruncInRing(to_shift, l, pow2m):
return types.sint.bit_compose(reversed(bits))
def SplitInRing(a, l, m):
if l == 1:
return m.if_else(a, 0), m.if_else(0, a), 1
pow2m = Pow2(m, l, None)
upper = TruncInRing(a, l, pow2m)
lower = a - upper * pow2m
@@ -620,27 +623,36 @@ def BITLT(a, b, bit_length):
def BitDecFull(a):
from .library import get_program, do_while, if_, break_point
from .types import sint, regint, longint
p=int(get_program().options.prime)
p = get_program().prime
assert p
bit_length = p.bit_length()
bbits = [sint(size=a.size) for i in range(bit_length)]
tbits = [[sint(size=1) for i in range(bit_length)] for j in range(a.size)]
pbits = util.bit_decompose(p)
# Loop until we get some random integers less than p
done = [regint(0) for i in range(a.size)]
@do_while
def get_bits_loop():
logp = int(round(math.log(p, 2)))
if abs(p - 2 ** logp) / p < 2 ** -get_program().security:
# inspired by Rabbit (https://eprint.iacr.org/2021/119)
# no need for exact randomness generation
# if modulo a power of two is close enough
bbits = [sint.get_random_bit(size=a.size) for i in range(logp)]
if logp != bit_length:
bbits += [sint(0, size=a.size)]
else:
bbits = [sint(size=a.size) for i in range(bit_length)]
tbits = [[sint(size=1) for i in range(bit_length)] for j in range(a.size)]
pbits = util.bit_decompose(p)
# Loop until we get some random integers less than p
done = [regint(0) for i in range(a.size)]
@do_while
def get_bits_loop():
for j in range(a.size):
@if_(done[j] == 0)
def _():
for i in range(bit_length):
tbits[j][i].link(sint.get_random_bit())
c = regint(BITLT(tbits[j], pbits, bit_length).reveal())
done[j].link(c)
return (sum(done) != a.size)
for j in range(a.size):
@if_(done[j] == 0)
def _():
for i in range(bit_length):
tbits[j][i].link(sint.get_random_bit())
c = regint(BITLT(tbits[j], pbits, bit_length).reveal())
done[j].link(c)
return (sum(done) != a.size)
for j in range(a.size):
for i in range(bit_length):
movs(bbits[i][j], tbits[j][i])
for i in range(bit_length):
movs(bbits[i][j], tbits[j][i])
b = sint.bit_compose(bbits)
c = (a-b).reveal()
t = (p-c).bit_decompose(bit_length)

View File

@@ -1577,19 +1577,6 @@ class writesocketc(base.IOInstruction):
def has_var_args(self):
return True
@base.vectorize
class writesockets(base.IOInstruction):
"""
Write a variable number of secret shares + MACs from registers into a socket
for a specified client id, message_type
"""
__slots__ = []
code = base.opcodes['WRITESOCKETS']
arg_format = tools.chain(['ci', 'int'], itertools.repeat('s'))
def has_var_args(self):
return True
@base.vectorize
class writesocketshare(base.IOInstruction):
""" Write a variable number of shares (without MACs) from secret

View File

@@ -903,7 +903,7 @@ class DirectMemoryWriteInstruction(DirectMemoryInstruction, \
WriteMemoryInstruction):
__slots__ = []
def __init__(self, *args, **kwargs):
if program.curr_tape.prevent_direct_memory_write:
if not program.curr_tape.singular:
raise CompilerError('Direct memory writing prevented in threads')
super(DirectMemoryWriteInstruction, self).__init__(*args, **kwargs)

View File

@@ -1062,14 +1062,14 @@ def for_range_opt_multithread(n_threads, n_loops):
"""
return for_range_multithread(n_threads, None, n_loops)
def multithread(n_threads, n_items, max_size=None):
def multithread(n_threads, n_items=None, max_size=None):
"""
Distribute the computation of :py:obj:`n_items` to
:py:obj:`n_threads` threads, but leave the in-thread repetition up
to the user.
:param n_threads: compile-time (int)
:param n_items: regint/cint/int
:param n_items: regint/cint/int (default: :py:obj:`n_threads`)
The following executes ``f(0, 8)``, ``f(8, 8)``, and
``f(16, 9)`` in three different threads:
@@ -1080,6 +1080,8 @@ def multithread(n_threads, n_items, max_size=None):
def f(base, size):
...
"""
if n_items is None:
n_items = n_threads
if max_size is None:
return map_reduce(n_threads, None, n_items, initializer=lambda: [],
reducer=None, looping=False)

View File

@@ -703,6 +703,9 @@ class Dense(DenseBase):
progress('f input')
def forward(self, batch=None):
if batch is None:
batch = regint.Array(self.N)
batch.assign(regint.inc(self.N))
self.compute_f_input(batch=batch)
if self.activation_layer:
self.activation_layer.forward(batch)

View File

@@ -91,7 +91,11 @@ class intBlock(Block):
for length,start in zip(self.lengths[:-1],series(self.lengths)):
res.append(remainder.mod2m(length, total_length - start, False))
remainder -= res[-1]
remainder /= floatingpoint.two_power(length)
if Program.prog.options.ring:
remainder = remainder.trunc_zeros(length,
total_length - start, False)
else:
remainder /= floatingpoint.two_power(length)
res.append(remainder)
return res
def set_slice(self, value):
@@ -1498,12 +1502,12 @@ class PackedIndexStructure(object):
rem = mod2m(index, self.log_entries_per_block, log2(self.size), False)
c = mod2m(rem, self.log_entries_per_element, \
self.log_entries_per_block, False)
b = (rem - c).trunc_zeros(self.log_entries_per_element,
b = trunc_zeros(rem - c, self.log_entries_per_element,
self.log_entries_per_block)
if self.small:
return 0, b, c
else:
return (index - rem).trunc_zeros(self.log_entries_per_block,
return trunc_zeros(index - rem, self.log_entries_per_block,
log2(self.size)), b, c
else:
index_bits = bit_decompose(index, log2(self.size))

View File

@@ -118,7 +118,6 @@ class Program(object):
self.req_num = None
self.tape_stack = []
self.n_threads = 1
self.free_threads = set()
self.public_input_file = None
self.types = {}
self.budget = int(self.options.budget)
@@ -206,6 +205,28 @@ class Program(object):
self.progname = progname
def new_tape(self, function, args=[], name=None, single_thread=False):
"""
Create a new tape from a function. See
:py:func:`~Compiler.library.multithread` and
:py:func:`~Compiler.library.for_range_opt_multithread` for
easier-to-use higher-level functionality. The following runs
two threads defined by two different functions::
def f():
...
def g():
...
tapes = [program.new_tape(x) for x in (f, g)]
thread_numbers = program.run_tapes(tapes)
program.join_tapes(threads_numbers)
:param function: Python function defining the thread
:param args: arguments to the function
:param name: name used for files
:param single_thread: Boolean indicating whether tape will never be run in parallel to itself
:returns: tape handle
"""
if name is None:
name = function.__name__
name = "%s-%s" % (self.name, name)
@@ -214,7 +235,7 @@ class Program(object):
tape_index = len(self.tapes)
self.tape_stack.append(self.curr_tape)
self.curr_tape = Tape(name, self)
self.curr_tape.prevent_direct_memory_write = not single_thread
self.curr_tape.singular = single_thread
self.tapes.append(self.curr_tape)
function(*args)
self.finalize_tape(self.curr_tape)
@@ -226,14 +247,31 @@ class Program(object):
return self.run_tapes([[tape_index, arg]])[0]
def run_tapes(self, args):
if self.curr_tape is not self.tapes[0]:
""" Run tapes in parallel. See :py:func:`new_tape` for an example.
:param args: list of tape handles or tuples of tape handle and extra argument (for :py:func:`~Compiler.library.get_arg`)
:returns: list of thread numbers
"""
if not self.curr_tape.singular:
raise CompilerError('Compiler does not support ' \
'recursive spawning of threads')
args = [list(util.tuplify(arg)) for arg in args]
singular_tapes = set()
for arg in args:
if self.tapes[arg[0]].singular:
if arg[0] in singular_tapes:
raise CompilerError('cannot run singular tape in parallel')
singular_tapes.add(arg[0])
assert len(arg)
assert len(arg) <= 2
if len(arg) == 1:
arg += [0]
thread_numbers = []
while len(thread_numbers) < len(args):
if self.free_threads:
thread_numbers.append(min(self.free_threads))
self.free_threads.remove(thread_numbers[-1])
free_threads = self.curr_tape.free_threads
if free_threads:
thread_numbers.append(min(free_threads))
free_threads.remove(thread_numbers[-1])
else:
thread_numbers.append(self.n_threads)
self.n_threads += 1
@@ -247,10 +285,18 @@ class Program(object):
return thread_numbers
def join_tape(self, thread_number):
self.join_tapes([thread_number])
def join_tapes(self, thread_numbers):
""" Wait for completion of tapes. See :py:func:`new_tape` for an example.
:param thread_numbers: list of thread numbers
"""
self.curr_tape.start_new_basicblock(name='pre-join_tape')
Compiler.instructions.join_tape(thread_number)
for thread_number in thread_numbers:
Compiler.instructions.join_tape(thread_number)
self.curr_tape.free_threads.add(thread_number)
self.curr_tape.start_new_basicblock(name='post-join_tape')
self.free_threads.add(thread_number)
def update_req(self, tape):
if self.req_num is None:
@@ -259,6 +305,7 @@ class Program(object):
self.req_num += tape.req_num
def write_bytes(self):
""" Write all non-empty threads and schedule to files. """
nonempty_tapes = [t for t in self.tapes]
@@ -312,7 +359,7 @@ class Program(object):
""" Allocate memory from the top """
if not isinstance(size, int):
raise CompilerError('size must be known at compile time')
if (creator_tape or self.curr_tape) != self.tapes[0]:
if not (creator_tape or self.curr_tape).singular:
raise CompilerError('cannot allocate memory outside main thread')
if size == 0:
return
@@ -510,7 +557,8 @@ class Tape:
self.req_bit_length = defaultdict(lambda: 0)
self.function_basicblocks = {}
self.functions = []
self.prevent_direct_memory_write = False
self.singular = True
self.free_threads = set()
class BasicBlock(object):
def __init__(self, parent, name, scope, exit_condition=None):

View File

@@ -21,7 +21,13 @@ Basic types
-----------
Basic types contain many special methods such as :py:func:`__add__`. This is
used for operator overloading in Python. In some operations such as
used for operator overloading in Python. It is not recommend to use
them, use the plain operators instead, such as ``+`` instead of
:py:func:`__add__`. See
https://docs.python.org/3/reference/datamodel.html#special-method-names
for a translation to operators.
In some operations such as
secure comparison, the secure computation protocols allows for more
parameters than just the operands which influence the performance. In
this case, we provide an alias for better code readability. For
@@ -780,7 +786,12 @@ class cint(_clear, _int):
@vectorized_classmethod
def read_from_socket(cls, client_id, n=1):
""" Read a list of clear values from socket. """
""" Receive clear value(s) from client.
:param client_id: Client id (regint)
:param n: number of values (default 1)
:returns: cint (if n=1) or list of cint
"""
res = [cls() for i in range(n)]
readsocketc(client_id, *res)
if n == 1:
@@ -790,7 +801,11 @@ class cint(_clear, _int):
@vectorized_classmethod
def write_to_socket(self, client_id, values, message_type=ClientMessageType.NoType):
""" Send a list of clear values to socket """
""" Send a list of clear values to a client.
:param client_id: Client id (regint)
:param values: list of cint
"""
writesocketc(client_id, message_type, *values)
@vectorized_classmethod
@@ -1207,7 +1222,12 @@ class regint(_register, _int):
@vectorized_classmethod
def read_from_socket(cls, client_id, n=1):
""" Receive n register values from socket """
""" Receive clear integer value(s) from client.
:param client_id: Client id (regint)
:param n: number of values (default 1)
:returns: regint (if n=1) or list of regint
"""
res = [cls() for i in range(n)]
readsocketint(client_id, *res)
if n == 1:
@@ -1217,7 +1237,11 @@ class regint(_register, _int):
@vectorized_classmethod
def write_to_socket(self, client_id, values, message_type=ClientMessageType.NoType):
""" Send a list of integers to socket """
""" Send a list of clear integers to a client.
:param client_id: Client id (regint)
:param values: list of regint
"""
writesocketint(client_id, message_type, *values)
@vectorize_init
@@ -1805,6 +1829,14 @@ class sint(_secret, _int):
PreOR = staticmethod(floatingpoint.PreOR)
get_type = staticmethod(lambda n: sint)
@staticmethod
def require_bit_length(n_bits):
if program.options.ring:
if int(program.options.ring) < n_bits:
raise CompilerError('computation modulus too small')
else:
program.curr_tape.require_bit_length(n_bits)
@vectorized_classmethod
def get_random_int(cls, bits):
""" Secret random n-bit number according to security model.
@@ -1906,7 +1938,12 @@ class sint(_secret, _int):
@vectorized_classmethod
def read_from_socket(cls, client_id, n=1):
""" Receive n shares and MAC shares from socket """
""" Receive secret-shared value(s) from client.
:param client_id: Client id (regint)
:param n: number of values (default 1)
:returns: sint (if n=1) or list of sint
"""
res = [cls() for i in range(n)]
readsockets(client_id, *res)
if n == 1:
@@ -1914,27 +1951,46 @@ class sint(_secret, _int):
else:
return res
@vectorized_classmethod
def write_to_socket(self, client_id, values, message_type=ClientMessageType.NoType):
""" Send a list of shares and MAC shares to socket """
writesockets(client_id, message_type, *values)
@vectorize
def write_share_to_socket(self, client_id, message_type=ClientMessageType.NoType):
""" Send only share to socket """
writesocketshare(client_id, message_type, self)
@vectorized_classmethod
def write_shares_to_socket(cls, client_id, values, message_type=ClientMessageType.NoType, include_macs=False):
def write_shares_to_socket(cls, client_id, values,
message_type=ClientMessageType.NoType):
""" Send shares of a list of values to a specified client socket.
:param client_id: regint
:param values: list of sint
"""
if include_macs:
writesockets(client_id, message_type, *values)
else:
writesocketshare(client_id, message_type, *values)
writesocketshare(client_id, message_type, *values)
@classmethod
def read_from_file(cls, start, n_items):
""" Read shares from ``Persistence/Transactions-P<playerno>.data``.
:param start: starting position in number of shares from beginning (int/regint/cint)
:param n_items: number of items (int)
:returns: destination for final position, -1 for eof reached, or -2 for file not found (regint)
:returns: list of shares
"""
shares = [cls(size=1) for i in range(n_items)]
stop = regint()
readsharesfromfile(regint.conv(start), stop, *shares)
return stop, shares
@staticmethod
def write_to_file(shares):
""" Write shares to ``Persistence/Transactions-P<playerno>.data``
(appending at the end).
:param: shares (list or iterable of sint)
"""
for share in shares:
assert isinstance(share, sint)
assert share.size == 1
writesharestofile(*shares)
@vectorized_classmethod
def load_mem(cls, address, mem_type=None):
@@ -2920,8 +2976,14 @@ class cfix(_number, _structure):
@vectorized_classmethod
def read_from_socket(cls, client_id, n=1):
""" Read one or more cfix values from a socket.
Sender will have already bit shifted and sent as cints."""
"""
Receive clear fixed-point value(s) from client. The client needs
to convert the values to the right integer representation.
:param client_id: Client id (regint)
:param n: number of values (default 1)
:returns: cfix (if n=1) or list of cfix
"""
cint_input = cint.read_from_socket(client_id, n)
if n == 1:
return cfix._new(cint_inputs)
@@ -2930,7 +2992,12 @@ class cfix(_number, _structure):
@vectorized_classmethod
def write_to_socket(self, client_id, values, message_type=ClientMessageType.NoType):
""" Send a list of cfix values to socket. Values are sent as bit shifted cints. """
""" Send a list of clear fixed-point values to a client
(represented as clear integers).
:param client_id: Client id (regint)
:param values: list of cint
"""
def cfix_to_cint(fix_val):
return cint(fix_val.v)
cint_values = list(map(cfix_to_cint, values))
@@ -3182,15 +3249,8 @@ class cfix(_number, _structure):
def print_plain(self):
""" Clear fixed-point output. """
if self.k > 64:
sign = 1 - (((self.v + (1 << (self.k - 1))) >> (self.k - 1)) & 1)
else:
tmp = regint()
convmodp(tmp, self.v, bitlength=self.k)
sign = cint(tmp < 0)
abs_v = sign.if_else(-self.v, self.v)
print_float_plain(cint(abs_v), cint(-self.f), \
cint(0), cint(sign), cint(0))
print_float_plain(cint.conv(self.v), cint(-self.f), \
cint(0), cint(0), cint(0))
def output_if(self, cond):
cond_print_plain(cond, self.v, cint(-self.f))
@@ -3206,8 +3266,14 @@ class _single(_number, _structure):
@classmethod
def receive_from_client(cls, n, client_id, message_type=ClientMessageType.NoType):
""" Securely obtain shares of n values input by a client.
Assumes client has already run bit shift to convert fixed point to integer."""
"""
Securely obtain shares of values input by a client. Assumes client
has already converted values to integer representation.
:param n: number of inputs (int)
:param client_id: regint
"""
sint_inputs = cls.int_type.receive_from_client(n, client_id, ClientMessageType.TripleShares)
return list(map(cls, sint_inputs))
@@ -3574,6 +3640,7 @@ class sfix(_fix):
""" Secret fixed-point input.
:param player: public (regint/cint/int) """
cls.int_type.require_bit_length(cls.k)
v = cls.int_type()
inputmixed('fix', v, cls.f, player)
return cls._new(v)
@@ -4486,7 +4553,7 @@ class Array(object):
raise CompilerError('cannot assign vector to all elements')
mem_value = MemValue(value)
self.address = MemValue.if_necessary(self.address)
n_threads = 8 if use_threads and len(self) > 2**20 else 1
n_threads = 8 if use_threads and len(self) > 2**20 else None
@library.for_range_multithread(n_threads, 1024, len(self))
def f(i):
self[i] = mem_value

View File

@@ -40,6 +40,12 @@ def mod2m(a, b, bits, signed):
else:
return a.mod2m(b, bits, signed=signed)
def trunc_zeros(a, n_zeros, bit_length=None):
if isinstance(a, int):
return a >> n_zeros
else:
return a.trunc_zeros(n_zeros, bit_length)
def right_shift(a, b, bits):
if isinstance(a, int):
return a >> b

142
ECDSA/CurveElement.cpp Normal file
View File

@@ -0,0 +1,142 @@
/*
* Element.cpp
*
*/
#include <ECDSA/CurveElement.h>
#include "Math/gfp.hpp"
unsigned char CurveElement::zero[crypto_core_ristretto255_BYTES];
void CurveElement::init()
{
Scalar::init_field(
(bigint(1) << 252) + bigint("27742317777372353535851937790883648493"),
false);
if(sodium_init() == -1)
throw runtime_error("cannot initalize sodium");
unsigned char tmp[crypto_core_ristretto255_SCALARBYTES];
memset(tmp, 0, sizeof(tmp));
crypto_scalarmult_ristretto255_base(zero, tmp);
}
void CurveElement::convert(unsigned char* res, const Scalar& other)
{
bigint tmp;
tmp = other;
assert(tmp.__get_mp()->_mp_size * sizeof(mp_limb_t) <= crypto_core_ristretto255_SCALARBYTES);
memset(res, 0, crypto_core_ristretto255_SCALARBYTES);
memcpy(res, tmp.__get_mp()->_mp_d, abs(tmp.__get_mp()->_mp_size) * sizeof(mp_limb_t));
}
CurveElement::CurveElement()
{
memcpy(a, zero, sizeof(a));
check();
}
CurveElement::CurveElement(const Scalar& other)
{
unsigned char tmp[crypto_core_ristretto255_SCALARBYTES];
convert(tmp, other);
crypto_scalarmult_ristretto255_base(a, tmp);
check();
}
CurveElement::CurveElement(word other)
{
if (other == 0)
{
*this = CurveElement();
return;
}
unsigned char tmp[crypto_core_ristretto255_SCALARBYTES];
memset(tmp, 0, sizeof(tmp));
memcpy(tmp, &other, sizeof(other));
crypto_scalarmult_ristretto255_base(a, tmp);
check();
}
void CurveElement::check()
{
#ifdef CURVE_CHECK
if (crypto_core_ristretto255_is_valid_point(a) != 1)
throw runtime_error("curve point not valid");
#endif
}
CurveElement CurveElement::operator +(const CurveElement& other) const
{
CurveElement res;
crypto_core_ristretto255_add(res.a, a, other.a);
res.check();
return res;
}
CurveElement CurveElement::operator -(const CurveElement& other) const
{
CurveElement res;
crypto_core_ristretto255_sub(res.a, a, other.a);
res.check();
return res;
}
CurveElement CurveElement::operator *(const Scalar& other) const
{
CurveElement res;
unsigned char tmp[crypto_core_ristretto255_SCALARBYTES];
convert(tmp, other);
if (crypto_scalarmult_ristretto255(res.a, tmp, a) < 0)
{
cerr << "EC multiplication by zero" << endl;
}
res.check();
return res;
}
CurveElement& CurveElement::operator +=(const CurveElement& other)
{
*this = *this + other;
return *this;
}
bool CurveElement::operator ==(const CurveElement& other) const
{
for (size_t i = 0; i < sizeof a; i++)
if (a[i] != other.a[i])
return false;
return true;
}
bool CurveElement::operator !=(const CurveElement& other) const
{
return not (*this == other);
}
void CurveElement::pack(octetStream& os) const
{
os.append(a, sizeof(a));
}
void CurveElement::unpack(octetStream& os)
{
os.consume(a, sizeof(a));
check();
}
ostream& operator <<(ostream& s, const CurveElement& x)
{
s << hex << *(word*)x.get();
return s;
}
octetStream CurveElement::hash(size_t n_bytes) const
{
octetStream os;
pack(os);
auto res = os.hash();
assert(n_bytes >= res.get_length());
res.resize_precise(n_bytes);
return res;
}

63
ECDSA/CurveElement.h Normal file
View File

@@ -0,0 +1,63 @@
/*
* Element.h
*
*/
#ifndef ECDSA_CURVEELEMENT_H_
#define ECDSA_CURVEELEMENT_H_
#include <sodium.h>
#include "Math/gfp.h"
class CurveElement : public ValueInterface
{
public:
typedef gfp_<2, 4> Scalar;
private:
static unsigned char zero[crypto_core_ristretto255_BYTES];
unsigned char a[crypto_core_ristretto255_BYTES];
static void convert(unsigned char* res, const Scalar& other);
public:
typedef void next;
typedef void Square;
static int size() { return sizeof(a); }
static string type_string() { return "Curve25519"; }
static void init();
CurveElement();
CurveElement(const Scalar& other);
CurveElement(word other);
void check();
const unsigned char* get() const { return a; }
CurveElement operator+(const CurveElement& other) const;
CurveElement operator-(const CurveElement& other) const;
CurveElement operator*(const Scalar& other) const;
CurveElement& operator+=(const CurveElement& other);
bool operator==(const CurveElement& other) const;
bool operator!=(const CurveElement& other) const;
void assign_zero() { *this = 0; }
bool is_zero() { return *this == 0; }
void add(octetStream& os) { *this += os.get<CurveElement>(); }
void pack(octetStream& os) const;
void unpack(octetStream& os);
octetStream hash(size_t n_bytes) const;
};
ostream& operator<<(ostream& s, const CurveElement& x);
#endif /* ECDSA_CURVEELEMENT_H_ */

View File

@@ -7,72 +7,127 @@
#include "Math/gfp.hpp"
#include <cryptopp/oids.h>
#include <cryptopp/misc.h>
CryptoPP::DL_GroupParameters_EC<CryptoPP::ECP> P256Element::params;
CryptoPP::ECP P256Element::curve;
EC_GROUP* P256Element::curve;
void P256Element::init()
{
params = CryptoPP::DL_GroupParameters_EC<CryptoPP::ECP>(CryptoPP::ASN1::secp256k1());
curve = params.GetCurve();
auto mod = params.GetSubgroupOrder();
Scalar::init_field(CryptoPP::IntToString(mod).c_str(), false);
}
CryptoPP::Integer P256Element::convert(const Scalar& other)
{
return CryptoPP::Integer((unsigned char*) other.get_ptr(), other.size(),
CryptoPP::Integer::UNSIGNED, CryptoPP::LITTLE_ENDIAN_ORDER);
curve = EC_GROUP_new_by_curve_name(NID_secp256k1);
assert(curve != 0);
auto modulus = EC_GROUP_get0_order(curve);
Scalar::init_field(BN_bn2dec(modulus), false);
}
P256Element::P256Element()
{
point = curve.Identity();
point = EC_POINT_new(curve);
assert(point != 0);
assert(EC_POINT_set_to_infinity(curve, point) != 0);
}
P256Element::P256Element(const Scalar& other)
P256Element::P256Element(const Scalar& other) :
P256Element()
{
point = params.ExponentiateBase(convert(other));
BIGNUM* exp = BN_new();
BN_dec2bn(&exp, bigint(other).get_str().c_str());
assert(EC_POINTs_mul(curve, point, exp, 0, 0, 0, 0) != 0);
BN_free(exp);
}
P256Element::P256Element(word other)
P256Element::P256Element(word other) :
P256Element()
{
point = params.ExponentiateBase(other);
BIGNUM* exp = BN_new();
BN_dec2bn(&exp, to_string(other).c_str());
assert(EC_POINTs_mul(curve, point, exp, 0, 0, 0, 0) != 0);
BN_free(exp);
}
P256Element& P256Element::operator =(const P256Element& other)
{
assert(EC_POINT_copy(point, other.point) != 0);
return *this;
}
void P256Element::check()
{
curve.VerifyPoint(point);
assert(EC_POINT_is_on_curve(curve, point, 0) == 1);
}
P256Element::Scalar P256Element::x() const
{
return bigint(IntToString(point.x));
BIGNUM* x = BN_new();
assert(EC_POINT_get_affine_coordinates_GFp(curve, point, x, 0, 0) != 0);
char* xx = BN_bn2dec(x);
Scalar res((bigint(xx)));
OPENSSL_free(xx);
BN_free(x);
return res;
}
P256Element P256Element::operator +(const P256Element& other) const
{
P256Element res;
res.point = curve.Add(point, other.point);
assert(EC_POINT_add(curve, res.point, point, other.point, 0) != 0);
return res;
}
P256Element P256Element::operator -(const P256Element& other) const
{
P256Element res;
res.point = curve.Add(point, curve.Inverse(other.point));
return res;
P256Element tmp = other;
assert(EC_POINT_invert(curve, tmp.point, 0) != 0);
return *this + tmp;
}
P256Element P256Element::operator *(const Scalar& other) const
{
P256Element res;
res.point = curve.Multiply(convert(other), point);
BIGNUM* exp = BN_new();
BN_dec2bn(&exp, bigint(other).get_str().c_str());
assert(EC_POINT_mul(curve, res.point, 0, point, exp, 0) != 0);
BN_free(exp);
return res;
}
bool P256Element::operator ==(const P256Element& other) const
{
int cmp = EC_POINT_cmp(curve, point, other.point, 0);
assert(cmp == 0 or cmp == 1);
return not cmp;
}
void P256Element::pack(octetStream& os) const
{
octet* buffer;
size_t length = EC_POINT_point2buf(curve, point,
POINT_CONVERSION_COMPRESSED, &buffer, 0);
assert(length != 0);
os.store_int(length, 8);
os.append(buffer, length);
}
void P256Element::unpack(octetStream& os)
{
size_t length = os.get_int(8);
assert(
EC_POINT_oct2point(curve, point, os.consume(length), length, 0)
!= 0);
}
ostream& operator <<(ostream& s, const P256Element& x)
{
char* hex = EC_POINT_point2hex(x.curve, x.point,
POINT_CONVERSION_COMPRESSED, 0);
s << hex;
OPENSSL_free(hex);
return s;
}
P256Element::P256Element(const P256Element& other) :
P256Element()
{
*this = other;
}
P256Element operator*(const P256Element::Scalar& x, const P256Element& y)
{
return y * x;
@@ -90,44 +145,17 @@ P256Element& P256Element::operator /=(const Scalar& other)
return *this;
}
bool P256Element::operator ==(const P256Element& other) const
{
return point == other.point;
}
bool P256Element::operator !=(const P256Element& other) const
{
return not (*this == other);
}
void P256Element::pack(octetStream& os) const
octetStream P256Element::hash(size_t n_bytes) const
{
os.serialize(point.identity);
size_t l;
l = point.x.MinEncodedSize();
os.serialize(l);
point.x.Encode(os.append(l), l);
l = point.y.MinEncodedSize();
os.serialize(l);
point.y.Encode(os.append(l), l);
}
void P256Element::unpack(octetStream& os)
{
os.unserialize(point.identity);
size_t l;
os.unserialize(l);
point.x.Decode(os.consume(l), l);
os.unserialize(l);
point.y.Decode(os.consume(l), l);
}
ostream& operator <<(ostream& s, const P256Element& x)
{
auto& point = x.get();
if (point.identity)
s << "ID" << endl;
else
s << point.x << "," << point.y;
return s;
octetStream os;
pack(os);
auto res = os.hash();
assert(n_bytes >= res.get_length());
res.resize_precise(n_bytes);
return res;
}

View File

@@ -6,7 +6,8 @@
#ifndef ECDSA_P256ELEMENT_H_
#define ECDSA_P256ELEMENT_H_
#include <cryptopp/eccrypto.h>
#include <openssl/ec.h>
#include <openssl/obj_mac.h>
#include "Math/gfp.h"
@@ -16,12 +17,9 @@ public:
typedef gfp_<2, 4> Scalar;
private:
static CryptoPP::DL_GroupParameters_EC<CryptoPP::ECP> params;
static CryptoPP::ECP curve;
static EC_GROUP* curve;
CryptoPP::ECP::Point point;
static CryptoPP::Integer convert(const Scalar& other);
EC_POINT* point;
public:
typedef void next;
@@ -35,13 +33,13 @@ public:
static void init();
P256Element();
P256Element(const P256Element& other);
P256Element(const Scalar& other);
P256Element(word other);
void check();
P256Element& operator=(const P256Element& other);
const CryptoPP::ECP::Point& get() const { return point; }
// const unsigned char* get() const { return a; }
void check();
Scalar x() const;
@@ -55,16 +53,18 @@ public:
bool operator==(const P256Element& other) const;
bool operator!=(const P256Element& other) const;
void assign_zero() { *this = 0; }
bool is_zero() { return *this == 0; }
void assign_zero() { *this = {}; }
bool is_zero() { return *this == P256Element(); }
void add(octetStream& os) { *this += os.get<P256Element>(); }
void pack(octetStream& os) const;
void unpack(octetStream& os);
octetStream hash(size_t n_bytes) const;
friend ostream& operator<<(ostream& s, const P256Element& x);
};
P256Element operator*(const P256Element::Scalar& x, const P256Element& y);
ostream& operator<<(ostream& s, const P256Element& x);
#endif /* ECDSA_P256ELEMENT_H_ */

View File

@@ -5,9 +5,6 @@ in `preprocessing.hpp` and `sign.hpp`, respectively.
#### Compilation
- Add either `CXX = clang++` or `OPTIM = -O2` because GCC 8 or later with `-O3` will produce a segfault when using `mascot-ecdsa-party.x`
- For older hardware, also add `ARCH = -march=native`
- Install [Crypto++](https://www.cryptopp.com) (`libcrypto++-dev` on Ubuntu). We used version 5.6.4, which is the default on Ubuntu 18.04.
- Compile the binaries: `make -j8 ecdsa`
- Or compile the static binaries: `make -j8 ecdsa-static`

View File

@@ -35,5 +35,5 @@ AddableVector<T> AddableVector<T>::mul_by_X_i(int j,
}
template
AddableVector<fixint<0>> AddableVector<fixint<0>>::mul_by_X_i(int j,
const FHE_PK& pk) const;
AddableVector<Int_Random_Coins::rand_type> AddableVector<
Int_Random_Coins::rand_type>::mul_by_X_i(int j, const FHE_PK& pk) const;

View File

@@ -114,7 +114,9 @@ void FHE_PK::check_noise(const Rq_Element& x, bool check_modulo) const
noise[i] /= pr;
m = m > noise[i] ? m : noise[i];
}
#ifdef VERBOSE_KEYGEN
cerr << "max noise: " << m << endl;
#endif
}

View File

@@ -516,7 +516,9 @@ void init(P2Data& P2D,const Ring& Rg)
imatrix A;
A.resize(Rg.phi_m(), imatrix::value_type(Gord*gf2n_short::degree()));
P2D.A.resize(A[0].size(), imatrix::value_type(A.size()));
P2D.A.resize(A[0].size());
for (auto& x : P2D.A)
x.resize(A.size());
for (int slot=0; slot<Gord; slot++)
{ for (int co=0; co<gf2n_short::degree(); co++)
{ // Work out how x^co in given slot maps to plaintext vector

View File

@@ -9,7 +9,11 @@
class FHE_PK;
class Int_Random_Coins : public AddableMatrix<fixint<0>>
#ifndef N_LIMBS_RAND
#define N_LIMBS_RAND 0
#endif
class Int_Random_Coins : public AddableMatrix<fixint<N_LIMBS_RAND>>
{
typedef value_type::value_type T;

View File

@@ -109,7 +109,6 @@ DistKeyGen::DistKeyGen(const FHE_Params& params, const bigint& p) :
*/
void DistKeyGen::Gen_Random_Data(PRNG& G)
{
cout << "In Gen Random Data " << endl;
secret.from_vec(params.sampleHwt(G));
rc1.generate(G);
rc2.generate(G);
@@ -228,7 +227,9 @@ void check_randomness(vector<octetStream>& seeds,
// Re-create the randomness from these seeds
for (int i = 0; i < num_players; i++)
{ G.SetSeed(seeds[i].get_data());
#ifdef VERBOSE_KEYGEN
cout << "\tSeed for player " << i << " is..." << seeds[i] << endl;
#endif
playerKeys[i].Gen_Random_Data(G);
globalKey += playerKeys[i];
}
@@ -292,22 +293,27 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
{
const FHE_Params& params=pk.get_params();
#ifdef VERBOSE_KEYGEN
double start,stop;
/***********************
* Step 1 *
***********************/
start=clock();
#endif
// First compute and commit to the challenge value
vector<unsigned int> e(P.num_players());
vector<octetStream> Comm_e(P.num_players());
vector<octetStream> Open_e(P.num_players());
Commit_To_Challenge(e,Comm_e,Open_e,P,num_runs);
#ifdef VERBOSE_KEYGEN
cout << "Done Step 1 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
/***********************
* Step 2 *
@@ -319,11 +325,13 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
vector<PRNG> G(num_runs);
Commit_To_Seeds(G,seeds,Comm_seeds,Open_seeds,P,num_runs);
#ifdef VERBOSE_KEYGEN
cout << "Done Step 2 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
/***********************
* Step 2.5 *
@@ -340,28 +348,27 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
keys[i].Gen_Random_Data(G[i]);
a[i][P.my_num()] = keys[i].a;
}
cout << "Generated Random Vals" << endl;
if (commit)
{
// Do Commit and Open to Get a
Commit_And_Open(a,P,num_runs);
cout << "Finished Commit and Open" << endl;
}
else
{
Transmit_Data(a,P,num_runs);
cout << "Finished open" << endl;
}
for (int i=0; i<num_runs; i++)
keys[i].sum_a(a[i]);
a.clear();
#ifdef VERBOSE_KEYGEN
cout << "Done Step 2.5 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
/***********************
* Step 3 *
@@ -373,11 +380,13 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
b[i][P.my_num()] = keys[i].b;
}
#ifdef VERBOSE_KEYGEN
cout << "Done Step 3 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
/***********************
* Step 4 *
@@ -387,11 +396,13 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
else
Transmit_Data(b,P,num_runs);
#ifdef VERBOSE_KEYGEN
cout << "Done Step 4 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
/***********************
* Step 5/6 *
@@ -404,11 +415,13 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
}
b.clear();
#ifdef VERBOSE_KEYGEN
cout << "Done Step 5/6 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
/***********************
* Step 7 *
@@ -418,11 +431,13 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
else
Transmit_Data(enc_dash,P,num_runs);
#ifdef VERBOSE_KEYGEN
cout << "Done Step 7 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
/***********************
* Step 8/9/10 *
@@ -434,11 +449,13 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
enc[i][P.my_num()] = keys[i].enc;
}
#ifdef VERBOSE_KEYGEN
cout << "Done Step 8/9/10 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
/***********************
* Step 11 *
@@ -448,11 +465,13 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
else
Transmit_Data(enc,P,num_runs);
#ifdef VERBOSE_KEYGEN
cout << "Done Step 11 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
/***********************
* Step 12 *
@@ -460,11 +479,13 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
for (int i=0; i<num_runs; i++)
keys[i].sum_enc(enc[i]);
#ifdef VERBOSE_KEYGEN
cout << "Done Step 12 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
/***********************
* Step 13/14 *
@@ -472,11 +493,13 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
int challenge=Open_Challenge(e,Open_e,Comm_e,P,num_runs);
#ifdef VERBOSE_KEYGEN
cout << "Done Step 13/14 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
/***********************
* Step 15 *
@@ -489,7 +512,10 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
/* Now Open All Bar The Challenge Run */
for (int i = 0; i < num_runs; i++)
{ if (i != challenge)
{ cout << "Checking run " << i << endl;
{
#ifdef VERBOSE_KEYGEN
cout << "Checking run " << i << endl;
#endif
check_randomness(seeds[i], keys[i].enc, keys[i].pk, keys[i].enc_dash, P.num_players());
}
}
@@ -497,15 +523,19 @@ void Run_Gen_Protocol(FHE_PK& pk,FHE_SK& sk,const Player& P,int num_runs,
// Set the key to the chosen run's output
keys[challenge].finalize(pk, sk);
#ifdef VERBOSE_KEYGEN
cout << "Done Step 15 " << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
start=clock();
#endif
P.Check_Broadcast();
#ifdef VERBOSE_KEYGEN
cout << "Broadcast check all passed" << endl;
stop=clock();
cout << "\t\tTime = " << (stop-start)/CLOCKS_PER_SEC << " seconds " << endl;
#endif
}

View File

@@ -15,13 +15,13 @@ Prover<FD,U>::Prover(Proof& proof, const FD& FieldD) :
s.resize(proof.V, proof.pk->get_params());
y.resize(proof.V, FieldD);
#ifdef LESS_ALLOC_MORE_MEM
s.allocate_slots(bigint(1) << proof.B_rand_length);
y.allocate_slots(bigint(1) << proof.B_plain_length);
t = s[0];
z = y[0];
// extra limb to prevent reallocation
t.allocate_slots(bigint(1) << (proof.B_rand_length + 64));
z.allocate_slots(bigint(1) << (proof.B_plain_length + 64));
s.allocate_slots(bigint(1) << proof.B_rand_length);
y.allocate_slots(bigint(1) << proof.B_plain_length);
#endif
}

View File

@@ -299,12 +299,6 @@ void MachineBase::run()
<< timer.elapsed() << " seconds" << endl;
cout << "CPU time: " << cpu_timer.elapsed() << endl;
extern unsigned long long sent_amount, sent_counter;
cout << "Data sent = " << sent_amount << " bytes in " << sent_counter
<< " calls, ";
cout << sent_amount / sent_counter / N.num_players() << " bytes per call"
<< endl;
cout << "Time: " << timer.elapsed() << endl;
cout << "Throughput: " << total / timer.elapsed() << endl;
mult_performance();

View File

@@ -52,46 +52,59 @@ public:
}
};
class MaliciousRepSecret : public ReplicatedSecret<MaliciousRepSecret>
template<class U>
class MalRepSecretBase : public ReplicatedSecret<U>
{
typedef ReplicatedSecret<MaliciousRepSecret> super;
typedef ReplicatedSecret<U> super;
public:
typedef Memory<MaliciousRepSecret> DynamicMemory;
typedef Memory<U> DynamicMemory;
typedef MaliciousRepMC<MaliciousRepSecret> MC;
typedef MaliciousRepMC<U> MC;
typedef MC MAC_Check;
typedef Beaver<MaliciousRepSecret> Protocol;
typedef ReplicatedInput<MaliciousRepSecret> Input;
typedef RepPrep<MaliciousRepSecret> LivePrep;
typedef ReplicatedInput<U> Input;
typedef RepPrep<U> LivePrep;
typedef MaliciousRepSecret part_type;
typedef MaliciousRepSecret whole_type;
typedef SmallMalRepSecret small_type;
typedef U part_type;
typedef U whole_type;
static const bool expensive_triples = true;
static MC* new_mc(mac_key_type)
static MC* new_mc(BitVec)
{
try
{
if (ThreadMaster<MaliciousRepSecret>::s().machine.more_comm_less_comp)
return new CommMaliciousRepMC<MaliciousRepSecret>;
if (ThreadMaster<U>::s().machine.more_comm_less_comp)
return new CommMaliciousRepMC<U>;
}
catch(no_singleton& e)
{
}
return new HashMaliciousRepMC<MaliciousRepSecret>;
return new HashMaliciousRepMC<U>;
}
static MaliciousRepSecret constant(const BitVec& other, int my_num, const BitVec& alphai)
static U constant(const BitVec& other, int my_num, const BitVec& alphai)
{
(void) my_num, (void) alphai;
return other;
}
MalRepSecretBase() {}
template<class T>
MalRepSecretBase(const T& other) : super(other) {}
};
class MaliciousRepSecret : public MalRepSecretBase<MaliciousRepSecret>
{
typedef MaliciousRepSecret This;
typedef MalRepSecretBase<This> super;
public:
typedef Beaver<MaliciousRepSecret> Protocol;
typedef SmallMalRepSecret small_type;
MaliciousRepSecret() {}
template<class T>
MaliciousRepSecret(const T& other) : super(other) {}

120
GC/PostSacriBin.cpp Normal file
View File

@@ -0,0 +1,120 @@
/*
* Abfllnoww.cpp
*
*/
#include "PostSacriBin.h"
#include "Processor/Processor.h"
#include "Protocols/Replicated.hpp"
#include "Protocols/MaliciousRepMC.hpp"
#include "ShareSecret.hpp"
namespace GC
{
PostSacriBin::PostSacriBin(Player& P) :
ReplicatedBase(P), honest(P)
{
}
PostSacriBin::~PostSacriBin()
{
if (not inputs.empty())
{
cerr << "unchecked ANDs" << endl;
terminate();
}
}
void PostSacriBin::init_mul(SubProcessor<T>* proc)
{
assert(proc != 0);
init_mul(proc->DataF, proc->MC);
}
void PostSacriBin::init_mul(Preprocessing<T>&, T::MC&)
{
if ((int) inputs.size() >= OnlineOptions::singleton.batch_size)
check();
honest.init_mul();
}
PostSacriBin::T::clear PostSacriBin::prepare_mul(const T& x, const T& y, int n)
{
honest.prepare_mul(x, y, n);
inputs.push_back({{x.mask(n), y.mask(n)}});
return {};
}
void PostSacriBin::exchange()
{
honest.exchange();
}
PostSacriBin::T PostSacriBin::finalize_mul(int n)
{
auto res = honest.finalize_mul(n);
outputs.push_back({res, n});
return res;
}
void PostSacriBin::check()
{
vector<array<T, 3>> to_check;
assert(inputs.size() == outputs.size());
for (size_t i = 0; i < inputs.size(); i++)
to_check.push_back({{inputs[i][0], inputs[i][1], outputs[i].first}});
GlobalPRNG G(P);
for (size_t i = 0; i < inputs.size(); i++)
to_check.push_back(get_d1_triple(G, outputs[i].second));
HashMaliciousRepMC<T> MC;
vector<array<T, 3>> _(N);
TripleShuffleSacrifice<T>(2, 6).triple_sacrifice(_, to_check, P, MC, 0, inputs.size());
MC.Check(P);
inputs.clear();
outputs.clear();
}
array<PostSacriBin::T, 3> PostSacriBin::get_d1_triple(GlobalPRNG& G, int n_bits)
{
while (d1.size() < N)
d1.push_back(get_d2_triple(T::N_BITS));
int i = G.get_uint(N);
auto tmp = d1.at(i).mask(n_bits);
d1[i] <<= n_bits;
d1[i] ^= get_d2_triple(n_bits);
array<T, 3> res({{tmp[0], tmp[1], tmp[2]}});
return res;
}
array<PostSacriBin::T, 3> PostSacriBin::get_d2_triple(int n_bits)
{
return get_triple_no_count(n_bits);
}
void PostSacriBin::get(Dtype type, T* res)
{
assert(type == DATA_TRIPLE);
if (d2.empty())
{
TripleShuffleSacrifice<T> sacrifice(2, 6);
vector<array<T, 3>> check_triples;
// optimistic triple generation
Replicated<T> protocol(P);
generate_triples(check_triples, 2 * N + 6, &protocol, T::N_BITS);
HashMaliciousRepMC<T> MC;
sacrifice.triple_sacrifice(d2, check_triples, P, MC, 0);
MC.Check(P);
assert(d2.size() == N);
}
for (int i = 0; i < 3; i++)
res[i] = d2.back()[i];
d2.pop_back();
}
} /* namespace GC */

52
GC/PostSacriBin.h Normal file
View File

@@ -0,0 +1,52 @@
/*
* Abfllnoww.h
*
*/
#ifndef GC_POSTSACRIBIN_H_
#define GC_POSTSACRIBIN_H_
#include "PostSacriSecret.h"
#include "Protocols/Replicated.h"
#include "ShiftableTripleBuffer.h"
namespace GC
{
class PostSacriBin : public ReplicatedBase,
public ProtocolBase<PostSacriSecret>,
ShiftableTripleBuffer<PostSacriSecret>
{
typedef PostSacriSecret T;
Replicated<T> honest;
vector<array<T, 2>> inputs;
vector<pair<T, int>> outputs;
// as in Araki et al. (S&P'17)
vector<FixedVec<T, 3>> d1;
vector<array<T, 3>> d2;
array<T, 3> get_d1_triple(GlobalPRNG& G, int n_bits);
array<T, 3> get_d2_triple(int n_bits);
void get(Dtype type, T* res);
const size_t N = 1 << 20;
public:
PostSacriBin(Player& P);
~PostSacriBin();
void init_mul(Preprocessing<T>&, T::MC&);
void init_mul(SubProcessor<T>* proc);
T::clear prepare_mul(const T& x, const T& y, int n = -1);
void exchange();
T finalize_mul(int n = -1);
void check();
};
} /* namespace GC */
#endif /* GC_POSTSACRIBIN_H_ */

37
GC/PostSacriSecret.h Normal file
View File

@@ -0,0 +1,37 @@
/*
* AbfllnowShare.h
*
*/
#ifndef GC_POSTSACRISECRET_H_
#define GC_POSTSACRISECRET_H_
#include "MaliciousRepSecret.h"
namespace GC
{
class PostSacriBin;
class PostSacriSecret : public MalRepSecretBase<PostSacriSecret>
{
typedef PostSacriSecret This;
typedef MalRepSecretBase<This> super;
public:
typedef PostSacriBin Protocol;
PostSacriSecret()
{
}
template<class T>
PostSacriSecret(const T& other) :
super(other)
{
}
};
}
#endif

View File

@@ -14,6 +14,8 @@
#include "Tools/callgrind.h"
#include "Processor/Instruction.hpp"
namespace GC
{

View File

@@ -22,7 +22,7 @@ class ShareParty : public ThreadMaster<T>
{
static ShareParty<T>* singleton;
ez::ezOptionParser opt;
ez::ezOptionParser& opt;
OnlineOptions online_opts;
public:
@@ -30,7 +30,8 @@ public:
typename T::mac_key_type mac_key;
ShareParty(int argc, const char** argv, int default_batch_size = 0);
ShareParty(int argc, const char** argv, ez::ezOptionParser& opt,
int default_batch_size = 0);
Thread<T>* new_thread(int i);

View File

@@ -17,6 +17,10 @@
#include "ShareThread.hpp"
#include "RepPrep.hpp"
#include "ThreadMaster.hpp"
#include "Thread.hpp"
#include "ShareSecret.hpp"
#include "Protocols/Replicated.hpp"
#include "Protocols/ReplicatedPrep.hpp"
#include "Protocols/MaliciousRepMC.hpp"
@@ -29,16 +33,31 @@ template<class T>
ShareParty<T>* ShareParty<T>::singleton = 0;
template<class T>
ShareParty<T>::ShareParty(int argc, const char** argv, int default_batch_size) :
ThreadMaster<T>(online_opts), online_opts(opt, argc, argv,
void simple_binary_main(int argc, const char** argv, int default_batch_size = 0)
{
ez::ezOptionParser opt;
ShareParty<T>(argc, argv, opt, default_batch_size);
}
template<class T>
ShareParty<T>::ShareParty(int argc, const char** argv, ez::ezOptionParser& opt,
int default_batch_size) :
ThreadMaster<T>(online_opts), opt(opt),
online_opts(this->opt, argc, argv,
default_batch_size)
{
if (singleton)
throw runtime_error("there can only be one");
singleton = this;
int nplayers = 0;
opt.parse(argc, argv);
if (opt.get("-N"))
opt.get("-N")->getInt(nplayers);
opt.resetArgs();
NetworkOptionsWithNumber network_opts(opt, argc, argv,
T::dishonest_majority ? 2 : 3, T::variable_players);
nplayers > 0 ? nplayers : (T::dishonest_majority ? 2 : 3),
T::variable_players and nplayers == 0);
if (T::dishonest_majority)
opt.add(
"", // Default.

View File

@@ -25,7 +25,7 @@ union matrix32x8
void transpose(square64& output, int x, int y)
{
#ifdef __AVX2__
#if defined(__AVX2__) || !defined(__x86_64__)
if (cpu_has_avx2())
{
for (int j = 0; j < 8; j++)
@@ -66,7 +66,7 @@ case I: \
void zip(int chunk_size, __m256i& lows, __m256i& highs,
const __m256i& a, const __m256i& b)
{
#ifdef __AVX2__
#if defined(__AVX2__) || !defined(__x86_64__)
if (cpu_has_avx2())
{
switch (chunk_size)

View File

@@ -6,10 +6,10 @@
#ifndef GC_SQUARE64_H_
#define GC_SQUARE64_H_
#include <immintrin.h>
#include <string.h>
#include <cstdint>
#include "Tools/int.h"
#include "Tools/intrinsics.h"
union square64
{

View File

@@ -17,5 +17,7 @@
int main(int argc, const char** argv)
{
gf2n_short::init_field(40);
GC::ShareParty<GC::CcdSecret<gf2n_short>>(argc, argv);
ez::ezOptionParser opt;
ShamirOptions::singleton = {opt, argc, argv};
GC::ShareParty<GC::CcdSecret<gf2n_short>>(argc, argv, opt);
}

View File

@@ -18,5 +18,7 @@
int main(int argc, const char** argv)
{
gf2n_short::init_field(40);
GC::ShareParty<GC::MaliciousCcdSecret<gf2n_short>>(argc, argv);
ez::ezOptionParser opt;
ShamirOptions::singleton = {opt, argc, argv};
GC::ShareParty<GC::MaliciousCcdSecret<gf2n_short>>(argc, argv, opt);
}

View File

@@ -21,5 +21,5 @@
int main(int argc, const char** argv)
{
GC::ShareParty<GC::MaliciousRepSecret>(argc, argv);
GC::simple_binary_main<GC::MaliciousRepSecret>(argc, argv);
}

View File

@@ -0,0 +1,14 @@
/*
* abfllnow-party.cpp
*
*/
#include "GC/PostSacriBin.h"
#include "GC/PostSacriSecret.h"
#include "GC/ShareParty.hpp"
int main(int argc, const char** argv)
{
GC::simple_binary_main<GC::PostSacriSecret>(argc, argv);
}

View File

@@ -20,5 +20,5 @@
int main(int argc, const char** argv)
{
GC::ShareParty<GC::SemiHonestRepSecret>(argc, argv);
GC::simple_binary_main<GC::SemiHonestRepSecret>(argc, argv);
}

View File

@@ -23,5 +23,5 @@
int main(int argc, const char** argv)
{
GC::ShareParty<GC::SemiSecret>(argc, argv);
GC::simple_binary_main<GC::SemiSecret>(argc, argv);
}

View File

@@ -27,5 +27,5 @@
int main(int argc, const char** argv)
{
gf2n_short::init_field(40);
GC::ShareParty<GC::TinierSecret<gf2n_short>>(argc, argv, 1000);
GC::simple_binary_main<GC::TinierSecret<gf2n_short>>(argc, argv, 1000);
}

View File

@@ -26,5 +26,5 @@
int main(int argc, const char** argv)
{
GC::ShareParty<GC::TinySecret<40>>(argc, argv, 1000);
GC::simple_binary_main<GC::TinySecret<40>>(argc, argv, 1000);
}

View File

@@ -26,7 +26,12 @@ VM = $(PROCESSOR) $(COMMON) GC/square64.o GC/Instruction.o OT/OTTripleSetup.o OT
LIB = libSPDZ.a
LIBRELEASE = librelease.a
ifeq ($(AVX_OT), 0)
LIBSIMPLEOT = ECDSA/P256Element.o
else
LIBSIMPLEOT = SimpleOT/libsimpleot.a
endif
# used for dependency generation
OBJS = $(BMR) $(FHEOFFLINE) $(TINYOTOFFLINE) $(YAO) $(COMPLETE) $(patsubst %.cpp,%.o,$(wildcard Machines/*.cpp Utils/*.cpp))
@@ -47,7 +52,6 @@ binary: rep-bin yao semi-bin-party.x tinier-party.x tiny-party.x ccd-party.x mal
ifeq ($(USE_NTL),1)
all: overdrive she-offline
gear: cowgear-party.x chaigear-party.x lowgear-party.x highgear-party.x
arithmetic: hemi-party.x soho-party.x gear
endif
@@ -73,13 +77,14 @@ yao: yao-party.x
she-offline: Check-Offline.x spdz2-offline.x
overdrive: simple-offline.x pairwise-offline.x cnc-offline.x
overdrive: simple-offline.x pairwise-offline.x cnc-offline.x gear
gear: cowgear-party.x chaigear-party.x lowgear-party.x highgear-party.x
rep-field: malicious-rep-field-party.x replicated-field-party.x ps-rep-field-party.x
rep-ring: replicated-ring-party.x brain-party.x malicious-rep-ring-party.x ps-rep-ring-party.x rep4-ring-party.x
rep-bin: replicated-bin-party.x malicious-rep-bin-party.x Fake-Offline.x
rep-bin: replicated-bin-party.x malicious-rep-bin-party.x ps-rep-bin-party.x Fake-Offline.x
replicated: rep-field rep-ring rep-bin
@@ -96,6 +101,10 @@ else
tldr: mpir
endif
ifeq ($(MACHINE), aarch64)
tldr: simde/simde
endif
shamir: shamir-party.x malicious-shamir-party.x galois-degree.x
sy: sy-rep-field-party.x sy-rep-ring-party.x sy-shamir-party.x
@@ -107,10 +116,10 @@ $(LIBRELEASE): Protocols/MalRepRingOptions.o $(PROCESSOR) $(COMMON) $(OT) $(GC)
$(AR) -csr $@ $^
static/%.x: Machines/%.o $(LIBRELEASE) $(LIBSIMPLEOT)
$(CXX) $(CFLAGS) -o $@ $^ $(LIBRELEASE) $(LIBSIMPLEOT) -Wl,-Map=$<.map -Wl,-Bstatic -static-libgcc -static-libstdc++ $(BOOST) $(LDLIBS) -Wl,-Bdynamic -ldl
$(CXX) $(CFLAGS) -o $@ $^ -Wl,-Map=$<.map -Wl,-Bstatic -static-libgcc -static-libstdc++ $(LIBRELEASE) $(LIBSIMPLEOT) $(BOOST) $(LDLIBS) -Wl,-Bdynamic -ldl
static/%.x: ECDSA/%.o ECDSA/P256Element.o $(VM) $(OT) $(LIBSIMPLEOT)
$(CXX) $(CFLAGS) -o $@ $^ -Wl,-Map=$<.map -Wl,-Bstatic -static-libgcc -static-libstdc++ $(BOOST) $(LDLIBS) $(ECLIB) -Wl,-Bdynamic -ldl
$(CXX) $(CFLAGS) -o $@ $^ -Wl,-Map=$<.map -Wl,-Bstatic -static-libgcc -static-libstdc++ $(BOOST) $(LDLIBS) -Wl,-Bdynamic -ldl
static-dir:
@ mkdir static 2> /dev/null; true
@@ -118,7 +127,7 @@ static-dir:
static-release: static-dir $(patsubst Machines/%.cpp, static/%.x, $(wildcard Machines/*-party.cpp)) static/emulate.x
Fake-ECDSA.x: ECDSA/Fake-ECDSA.cpp ECDSA/P256Element.o $(COMMON) Processor/PrepBase.o
$(CXX) -o $@ $^ $(CFLAGS) $(LDLIBS) $(ECLIB)
$(CXX) -o $@ $^ $(CFLAGS) $(LDLIBS)
Check-Offline.x: $(PROCESSOR)
@@ -167,14 +176,24 @@ secure.x: Utils/secure.o
%.x: Machines/%.o $(VM) OT/OTTripleSetup.o OT/BaseOT.o $(LIBSIMPLEOT)
$(CXX) -o $@ $(CFLAGS) $^ $(LDLIBS)
%gear-party.x: Machines/%gear-party.o $(VM) OT/OTTripleSetup.o OT/BaseOT.o $(LIBSIMPLEOT)
$(CXX) -o $@ $(CFLAGS) $^ $(LDLIBS) -lntl
hemi-party.x: Machines/hemi-party.o $(VM)
$(CXX) -o $@ $(CFLAGS) $^ $(LDLIBS) -lntl
soho-party.x: Machines/soho-party.o $(VM)
$(CXX) -o $@ $(CFLAGS) $^ $(LDLIBS) -lntl
%-ecdsa-party.x: ECDSA/%-ecdsa-party.o ECDSA/P256Element.o $(VM)
$(CXX) -o $@ $(CFLAGS) $^ $(LDLIBS) $(ECLIB)
$(CXX) -o $@ $(CFLAGS) $^ $(LDLIBS)
replicated-bin-party.x: GC/square64.o
replicated-ring-party.x: GC/square64.o
replicated-field-party.x: GC/square64.o
brain-party.x: GC/square64.o
malicious-rep-bin-party.x: GC/square64.o
ps-rep-bin-party.x: GC/PostSacriBin.o
semi-bin-party.x: $(VM) $(OT) GC/SemiSecret.o GC/SemiPrep.o GC/square64.o
tiny-party.x: $(OT)
tinier-party.x: $(OT)
@@ -220,6 +239,7 @@ static/semi-bmr-party.x: $(BMR)
static/real-bmr-party.x: $(BMR)
static/bmr-program-party.x: $(BMR)
ifeq ($(AVX_OT), 1)
$(LIBSIMPLEOT): SimpleOT/Makefile
$(MAKE) -C SimpleOT
@@ -227,6 +247,7 @@ OT/BaseOT.o: SimpleOT/Makefile
SimpleOT/Makefile:
git submodule update --init SimpleOT
endif
.PHONY: Programs/Circuits
Programs/Circuits:
@@ -259,5 +280,8 @@ mac-setup:
-echo MY_LDLIBS += -L/usr/local/opt/openssl/lib >> CONFIG.mine
-echo USE_NTL = 1 >> CONFIG.mine
simde/simde:
git submodule update --init simde
clean:
-rm -f */*.o *.o */*.d *.d *.x core.* *.a gmon.out */*/*.o static/*.x

View File

@@ -277,6 +277,12 @@ public:
return res;
}
FixedVec<T, L>& operator<<=(int i)
{
*this = *this << i;
return *this;
}
FixedVec<T, L>& operator>>=(int i)
{
*this = *this >> i;

View File

@@ -143,19 +143,6 @@ class Integer : public IntBase<long>
friend unsigned int& operator+=(unsigned int& x, const Integer& other) { return x += other.a; }
long operator-() const { return -a; }
void add(const Integer& x, const Integer& y) { *this = x + y; }
void sub(const Integer& x, const Integer& y) { *this = x - y; }
void mul(const Integer& x, const Integer& y) { *this = x * y; }
void mul(const Integer& x) { *this = *this * x; }
void AND(const Integer& x, const Integer& y) { *this = x & y; }
void OR(const Integer& x, const Integer& y) { *this = x | y; }
void XOR(const Integer& x, const Integer& y) { *this = x ^ y; }
void SHL(const Integer& x, const Integer& y) { *this = x << y; }
// unsigned shift for Mod2m
void SHR(const Integer& x, const Integer& y) { *this = (unsigned long)x.a >> y.a; }
};
inline string to_string(const Integer& x)

View File

@@ -64,7 +64,9 @@ void Zp_Data::init(const bigint& p,bool mont)
void Zp_Data::Mont_Mult(mp_limb_t* z,const mp_limb_t* x,const mp_limb_t* y,int t) const
{
mp_limb_t ans[2*MAX_MOD_SZ+1],u;
mp_limb_t ans[2 * MAX_MOD_SZ + 1], u, yy[t + 1];
inline_mpn_copyi(yy, y, t);
yy[t] = 0;
// First loop
u=x[0]*y[0]*pi;
ans[t] = mpn_mul_1(ans,y,t,x[0]);
@@ -73,8 +75,8 @@ void Zp_Data::Mont_Mult(mp_limb_t* z,const mp_limb_t* x,const mp_limb_t* y,int t
{ // u=(ans0+xi*y0)*pd
u=(ans[i]+x[i]*y[0])*pi;
// ans=ans+xi*y+u*pr
ans[t+i]+=mpn_addmul_1(ans+i,y,t,x[i]);
ans[t+i+1]=mpn_addmul_1(ans+i,prA,t+1,u);
ans[t+i+1]=mpn_addmul_1(ans+i,yy,t+1,x[i]);
ans[t+i+1]+=mpn_addmul_1(ans+i,prA,t+1,u);
}
// if (ans>=pr) { ans=z-pr; }
// else { z=ans; }

View File

@@ -13,8 +13,8 @@
#include "Math/bigint.h"
#include "Math/mpn_fixed.h"
#include "Tools/random.h"
#include "Tools/intrinsics.h"
#include <smmintrin.h>
#include <iostream>
using namespace std;
@@ -43,6 +43,8 @@ class Zp_Data
void Mont_Mult(mp_limb_t* z,const mp_limb_t* x,const mp_limb_t* y, int t) const;
void Mont_Mult_variable(mp_limb_t* z,const mp_limb_t* x,const mp_limb_t* y) const
{ Mont_Mult(z, x, y, t); }
void Mont_Mult_max(mp_limb_t* z, const mp_limb_t* x, const mp_limb_t* y,
int max_t) const;
public:
@@ -125,7 +127,7 @@ inline void Zp_Data::Add<0>(mp_limb_t* ans,const mp_limb_t* x,const mp_limb_t* y
template<>
inline void Zp_Data::Add<1>(mp_limb_t* ans,const mp_limb_t* x,const mp_limb_t* y) const
{
#ifdef __clang__
#if defined(__clang__) || !defined(__x86_64__)
Add<0>(ans, x, y);
#else
*ans = *x + *y;
@@ -139,7 +141,7 @@ inline void Zp_Data::Add<1>(mp_limb_t* ans,const mp_limb_t* x,const mp_limb_t* y
template<>
inline void Zp_Data::Add<2>(mp_limb_t* ans,const mp_limb_t* x,const mp_limb_t* y) const
{
#ifdef __clang__
#if defined(__clang__) || !defined(__x86_64__)
Add<0>(ans, x, y);
#else
__uint128_t a, b, p;
@@ -229,7 +231,7 @@ inline void Zp_Data::Mont_Mult_(mp_limb_t* z,const mp_limb_t* x,const mp_limb_t*
{ // u=(ans0+xi*y0)*pd
u=(ans[i]+x[i]*y[0])*pi;
// ans=ans+xi*y+u*pr
mpn_addmul_1_fixed_<T + 1, T>(ans+i,y,x[i]);
mpn_addmul_1_fixed_<T + 2, T>(ans+i,y,x[i]);
mpn_addmul_1_fixed_<T + 2, T + 1>(ans+i,prA,u);
}
// if (ans>=pr) { ans=z-pr; }
@@ -276,4 +278,11 @@ inline void Zp_Data::Mont_Mult(mp_limb_t* z,const mp_limb_t* x,const mp_limb_t*
}
}
inline void Zp_Data::Mont_Mult_max(mp_limb_t* z, const mp_limb_t* x,
const mp_limb_t* y, int max_t) const
{
assert(t <= max_t);
Mont_Mult(z, x, y);
}
#endif

View File

@@ -153,6 +153,11 @@ bigint::bigint(const gfpvar& other)
to_bigint(*this, other.get(), other.get_ZpD());
}
bigint::bigint(const mp_limb_t* data, size_t n_limbs)
{
mpz_import(get_mpz_t(), n_limbs, -1, 8, -1, 0, data);
}
string to_string(const bigint& x)
{
stringstream ss;

View File

@@ -63,6 +63,7 @@ public:
bigint(const fixint<L>& x) : bigint(typename fixint<L>::super(x)) {}
bigint(const Integer& x);
bigint(const GC::Clear& x);
bigint(const mp_limb_t* data, size_t n_limbs);
bigint& operator=(int n);
bigint& operator=(long n);
@@ -75,6 +76,11 @@ public:
template<int K>
bigint& operator=(const SignedZ2<K>& x);
template<int X, int L>
bigint& from_signed(const gfp_<X, L>& other);
template<class T>
bigint& from_signed(const T& other);
void allocate_slots(const bigint& x) { *this = x; }
int get_min_alloc() { return get_mpz_t()->_mp_alloc; }

View File

@@ -9,12 +9,27 @@
#include "bigint.h"
#include "Integer.h"
template<int X, int L>
bigint& bigint::from_signed(const gfp_<X, L>& other)
{
to_signed_bigint(*this, other);
return *this;
}
template<class T>
bigint& bigint::from_signed(const T& other)
{
*this = other;
return *this;
}
template<class T>
mpf_class bigint::get_float(T v, T p, T z, T s)
{
// MPIR can't handle more precision in exponent
Integer exp = Integer(p, 31).get();
bigint tmp = v;
bigint tmp;
tmp.from_signed(v);
mpf_class res = tmp;
if (exp > 0)
mpf_mul_2exp(res.get_mpf_t(), res.get_mpf_t(), exp.get());

View File

@@ -11,7 +11,7 @@
template<int L>
class fixint : public SignedZ2<64 * (L + 1)>
{
static const int OVERFLOW = 60;
static const int N_OVERFLOW = 60;
public:
typedef SignedZ2<64 * (L + 1)> super;
@@ -24,7 +24,7 @@ public:
fixint(const T& other) :
super(other)
{
auto check = mp_limb_signed_t(this->a[this->N_WORDS - 1]) >> OVERFLOW;
auto check = mp_limb_signed_t(this->a[this->N_WORDS - 1]) >> N_OVERFLOW;
assert(check == 0 or check == -1);
}
@@ -70,10 +70,10 @@ public:
void allocate_slots(const T& limit)
{
int n_bits = this->size_in_bits();
if (numBits(limit) - OVERFLOW > n_bits)
if (numBits(limit) - N_OVERFLOW > n_bits)
{
cerr << "cannot hold " << numBits(limit) << " bits, " << n_bits
<< " available" << endl;
cerr << "maybe change N_LIMBS_RAND to at least "
<< ((numBits(limit) - N_OVERFLOW) / 64) << endl;
throw runtime_error("fixed-length integer too small");
}
}

View File

@@ -2,12 +2,10 @@
#include "Math/gf2n.h"
#include "Math/Bit.h"
#include "Tools/intrinsics.h"
#include "Tools/Exceptions.h"
#include <stdint.h>
#include <wmmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
const false_type ValueInterface::characteristic_two;
const false_type ValueInterface::prime_field;
@@ -16,6 +14,9 @@ const false_type ValueInterface::invertible;
const true_type gf2n_short::characteristic_two;
const true_type gf2n_long::characteristic_two;
const true_type gf2n_short::invertible;
const true_type gf2n_long::invertible;
int gf2n_short::n = 0;
int gf2n_short::t1;
int gf2n_short::t2;

View File

@@ -6,12 +6,10 @@
#include "gf2nlong.h"
#include "gf2n.h"
#include "Tools/intrinsics.h"
#include "Tools/Exceptions.h"
#include <stdint.h>
#include <wmmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
bool is_ge(__m128i a, __m128i b)

View File

@@ -12,9 +12,8 @@
#include <iostream>
using namespace std;
#include <smmintrin.h>
#include "Tools/random.h"
#include "Tools/intrinsics.h"
#include "Math/field_types.h"
#include "Math/bigint.h"
@@ -283,7 +282,7 @@ inline __m128i software_clmul(__m128i a, __m128i b, int choice)
template<int choice>
inline __m128i clmul(__m128i a, __m128i b)
{
#ifdef __PCLMUL__
#if defined(__PCLMUL__) || !defined(__x86_64__)
if (cpu_has_pclmul())
{
return _mm_clmulepi64_si128(a, b, choice);

View File

@@ -9,6 +9,9 @@
#include "gfp.hpp"
const true_type gfpvar::invertible;
const true_type gfpvar::prime_field;
Zp_Data gfpvar::ZpD;
string gfpvar::type_string()

View File

@@ -151,7 +151,7 @@ template<int L>
inline void Mul(modp_<L>& ans,const modp_<L>& x,const modp_<L>& y,const Zp_Data& ZpD)
{
if (ZpD.montgomery)
{ ZpD.Mont_Mult(ans.x,x.x,y.x); }
{ ZpD.Mont_Mult_max(ans.x,x.x,y.x,L); }
else
{ //ans.x=(x.x*y.x)%ZpD.pr;
mp_limb_t aa[2*L],q[2*L];

View File

@@ -253,7 +253,7 @@ void Inv(modp_<L>& ans,const modp_<L>& x,const Zp_Data& ZpD)
else
{ for (int i=sz; i<ZpD.t; i++) { ans.x[i]=0; } }
if (ZpD.montgomery)
{ ZpD.Mont_Mult(ans.x,ans.x,ZpD.R3); }
{ ZpD.Mont_Mult_max(ans.x,ans.x,ZpD.R3,L); }
}

View File

@@ -9,10 +9,10 @@
#include <mpir.h>
#include <string.h>
#include <assert.h>
#include <x86intrin.h>
#include "Tools/avx_memcpy.h"
#include "Tools/cpu_support.h"
#include "Tools/intrinsics.h"
inline void inline_mpn_zero(mp_limb_t* x, mp_size_t size)
{
@@ -50,6 +50,7 @@ inline void mpn_add_fixed_n<1>(mp_limb_t* res, const mp_limb_t* x, const mp_limb
*res = *x + *y;
}
#ifdef __x86_64__
template <>
inline void mpn_add_fixed_n<2>(mp_limb_t* res, const mp_limb_t* x, const mp_limb_t* y)
{
@@ -91,6 +92,7 @@ inline void mpn_add_fixed_n<4>(mp_limb_t* res, const mp_limb_t* x, const mp_limb
: "cc"
);
}
#endif
#ifdef __clang__
inline char clang_add_carry(char carryin, unsigned long x, unsigned long y, unsigned long& res)
@@ -133,16 +135,15 @@ mp_limb_t mpn_add_fixed_n_with_carry(mp_limb_t* res, const mp_limb_t* x, const m
inline mp_limb_t mpn_sub_n_borrow(mp_limb_t* res, const mp_limb_t* x, const mp_limb_t* y, int n)
{
#ifndef __clang__
#if __GNUC__ < 7
#if !defined(__clang__) || (__GNUC__ < 7) || !defined(__x86_64__)
// GCC 6 can't handle the code below
return mpn_sub_n(res, x, y, n);
#endif
#endif
#else
char borrow = 0;
for (int i = 0; i < n; i++)
borrow = _subborrow_u64(borrow, x[i], y[i], (unsigned long long*)&res[i]);
return borrow;
#endif
}
template <int N>
@@ -163,6 +164,7 @@ inline void mpn_sub_fixed_n<1>(mp_limb_t* res, const mp_limb_t* x, const mp_limb
*res = *x - *y;
}
#ifdef __x86_64__
template <>
inline mp_limb_t mpn_sub_fixed_n_borrow<1>(mp_limb_t* res, const mp_limb_t* x, const mp_limb_t* y)
{
@@ -235,6 +237,7 @@ inline void mpn_sub_fixed_n<4>(mp_limb_t* res, const mp_limb_t* x, const mp_limb
: "cc"
);
}
#endif
inline void mpn_add_n_use_fixed(mp_limb_t* res, const mp_limb_t* x, const mp_limb_t* y, mp_size_t n)
{
@@ -260,8 +263,8 @@ template <int L, int M, bool ADD>
inline void mpn_addmul_1_fixed__(mp_limb_t* res, const mp_limb_t* y, mp_limb_t x)
{
mp_limb_t lower[L], higher[L];
lower[L - 1] = 0;
higher[L - 1] = 0;
inline_mpn_zero(higher + M, L - M);
inline_mpn_zero(lower + M, L - M);
for (int j = 0; j < M; j++)
lower[j] = _mulx_u64(x, y[j], (long long unsigned*)higher + j);
if (ADD)

View File

@@ -158,7 +158,7 @@ void* Server::start_in_thread(void* server)
}
Server* Server::start_networking(Names& N, int my_num, int nplayers,
string hostname, int portnum)
string hostname, int portnum, int my_port)
{
#ifdef DEBUG_NETWORKING
cerr << "Starting networking for " << my_num << "/" << nplayers
@@ -173,7 +173,7 @@ Server* Server::start_networking(Names& N, int my_num, int nplayers,
pthread_create(&thread, 0, Server::start_in_thread,
server = new Server(nplayers, portnum));
}
N.init(my_num, portnum, Names::DEFAULT_PORT, hostname.c_str());
N.init(my_num, portnum, my_port, hostname.c_str());
if (my_num == 0)
{
pthread_join(thread, 0);

View File

@@ -26,7 +26,8 @@ class Server
public:
static void* start_in_thread(void* server);
static Server* start_networking(Names& N, int my_num, int nplayers,
string hostname = "localhost", int portnum = 9000);
string hostname = "localhost", int portnum = 9000, int my_port =
Names::DEFAULT_PORT);
Server(int argc, char** argv);
Server(int nmachines, int PortnumBase);

View File

@@ -6,6 +6,7 @@
#include <Networking/ServerSocket.h>
#include <Networking/sockets.h>
#include "Tools/Exceptions.h"
#include "Tools/time-func.h"
#include <netinet/ip.h>
#include <netinet/tcp.h>
@@ -46,10 +47,10 @@ ServerSocket::ServerSocket(int Portnum) : portnum(Portnum), thread(0)
gethostname((char*)my_name,512);
/* bind serv information to mysocket
* - Just assume it will eventually wake up
*/
fl=1;
while (fl!=0)
RunningTimer timer;
while (fl!=0 and timer.elapsed() < 600)
{ fl=::bind(main_socket, (struct sockaddr *)&serv, sizeof(struct sockaddr));
if (fl != 0)
{ cerr << "Binding to socket on " << my_name << ":" << Portnum << " failed, trying again in a second ..." << endl;
@@ -136,6 +137,9 @@ void ServerSocket::accept_clients()
struct sockaddr dest;
memset(&dest, 0, sizeof(dest)); /* zero the struct before filling the fields */
int socksize = sizeof(dest);
#ifdef DEBUG_NETWORKING
fprintf(stderr, "Accepting...\n");
#endif
int consocket = accept(main_socket, (struct sockaddr *)&dest, (socklen_t*) &socksize);
if (consocket<0) { error("set_up_socket:accept"); }

View File

@@ -18,6 +18,18 @@
#endif
inline void short_memcpy(void* out, void* in, size_t n_bytes)
{
switch (n_bytes)
{
#define X(N) case N: avx_memcpy<N>(out, in); break;
X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8)
#undef X
default:
throw invalid_length("length outside range");
}
}
inline void encode_length(octet *buff, size_t len, size_t n_bytes)
{
if (n_bytes > 8)
@@ -31,7 +43,7 @@ inline void encode_length(octet *buff, size_t len, size_t n_bytes)
}
// use little-endian for optimization
uint64_t tmp = htole64(len);
avx_memcpy(buff, (void*)&tmp, n_bytes);
short_memcpy(buff, (void*)&tmp, n_bytes);
}
inline size_t decode_length(octet *buff, size_t n_bytes)
@@ -39,7 +51,7 @@ inline size_t decode_length(octet *buff, size_t n_bytes)
if (n_bytes > 8)
throw invalid_length("length field cannot be more than 64 bits");
uint64_t tmp = 0;
avx_memcpy((void*)&tmp, buff, n_bytes);
short_memcpy((void*)&tmp, buff, n_bytes);
return le64toh(tmp);
}

View File

@@ -9,23 +9,12 @@ using namespace std;
void error(const char *str)
{
int old_errno = errno;
char err[1000];
gethostname(err,1000);
strcat(err," : ");
strcat(err,str);
perror(err);
throw bad_value();
}
void error(const char *str1,const char *str2)
{
char err[1000];
gethostname(err,1000);
strcat(err," : ");
strcat(err,str1);
strcat(err,str2);
perror(err);
throw bad_value();
throw runtime_error(string() + err + " : " + strerror(old_errno));
}
void set_up_client_socket(int& mysocket,const char* hostname,int Portnum)
@@ -35,7 +24,7 @@ void set_up_client_socket(int& mysocket,const char* hostname,int Portnum)
hints.ai_family = AF_INET;
hints.ai_flags = AI_CANONNAME;
octet my_name[512];
char my_name[512];
memset(my_name,0,512*sizeof(octet));
gethostname((char*)my_name,512);
@@ -88,36 +77,39 @@ void set_up_client_socket(int& mysocket,const char* hostname,int Portnum)
int attempts = 0;
long wait = 1;
int fl;
int connect_errno;
do
{ fl=1;
while (fl==1 || errno==EINPROGRESS)
{
mysocket = socket(AF_INET, SOCK_STREAM, 0);
if (mysocket < 0)
error("set_up_socket:socket");
{
mysocket = socket(AF_INET, SOCK_STREAM, 0);
if (mysocket < 0)
error("set_up_socket:socket");
fl=connect(mysocket, addr, len);
attempts++;
if (fl != 0)
{
close(mysocket);
usleep(wait *= 2);
fl = connect(mysocket, addr, len);
connect_errno = errno;
attempts++;
if (fl != 0)
{
close(mysocket);
usleep(wait *= 2);
#ifdef DEBUG_NETWORKING
string msg = "Connecting to " + string(hostname) + ":" +
to_string(Portnum) + " failed";
perror(msg.c_str());
string msg = "Connecting to " + string(hostname) + ":" +
to_string(Portnum) + " failed";
errno = connect_errno;
perror(msg.c_str());
#endif
}
}
}
errno = connect_errno;
}
while (fl == -1 && (errno == ECONNREFUSED || errno == ETIMEDOUT)
&& timer.elapsed() < 60);
while (fl == -1
&& (errno == ECONNREFUSED || errno == ETIMEDOUT || errno == EINPROGRESS)
&& timer.elapsed() < 60);
if (fl < 0)
{
cout << attempts << " attempts to " << hostname << ":" << Portnum
<< endl;
error("set_up_socket:connect:", hostname);
throw runtime_error(
string() + "cannot connect from " + my_name + " to " + hostname + ":"
+ to_string(Portnum) + " after " + to_string(attempts)
+ " attempts in one minute because " + strerror(connect_errno));
}
freeaddrinfo(ai);
@@ -127,9 +119,6 @@ void set_up_client_socket(int& mysocket,const char* hostname,int Portnum)
fl= setsockopt(mysocket, IPPROTO_TCP, TCP_NODELAY, (char*)&one, sizeof(int));
if (fl<0) { error("set_up_socket:setsockopt"); }
fl=setsockopt(mysocket, SOL_SOCKET, SO_REUSEADDR, (char*)&one, sizeof(int));
if (fl<0) { error("set_up_socket:setsockopt"); }
#ifdef __APPLE__
int flags = fcntl(mysocket, F_GETFL, 0);
fl = fcntl(mysocket, F_SETFL, O_NONBLOCK | flags);
@@ -147,5 +136,3 @@ void close_client_socket(int socket)
error(tmp);
}
}
unsigned long long sent_amount = 0, sent_counter = 0;

View File

@@ -24,7 +24,6 @@
using namespace std;
void error(const char *str1,const char *str2);
void error(const char *str);
void set_up_client_socket(int& mysocket,const char* hostname,int Portnum);
@@ -42,9 +41,6 @@ template<class T>
void receive(T socket, octet* msg, size_t len);
extern unsigned long long sent_amount, sent_counter;
inline size_t send_non_blocking(int socket, octet* msg, size_t len)
{
int j = send(socket,msg,len,MSG_DONTWAIT);
@@ -66,9 +62,6 @@ inline void send(int socket,octet *msg,size_t len)
{
i += send_non_blocking(socket, msg + i, len - i);
}
sent_amount += len;
sent_counter++;
}
template<class T>

View File

@@ -7,10 +7,18 @@
#include <fstream>
#include <pthread.h>
#ifndef NO_AVX_OT
extern "C" {
#include "SimpleOT/ot_sender.h"
#include "SimpleOT/ot_receiver.h"
}
#endif
#include "ECDSA/P256Element.h"
#ifdef USE_RISTRETTO
#include "ECDSA/CurveElement.h"
#endif
using namespace std;
@@ -70,7 +78,57 @@ void send_if_ot_receiver(TwoPartyPlayer* P, vector<octetStream>& os, OT_ROLE rol
void BaseOT::exec_base(bool new_receiver_inputs)
{
if (not cpu_has_avx())
#ifdef NO_AVX_OT
#ifdef USE_RISTRETTO
typedef CurveElement Element;
#else
typedef P256Element Element;
#endif
Element::init();
vector<Element::Scalar> as, bs;
vector<Element> As;
SeededPRNG G;
vector<octetStream> os(2);
if (ot_role & SENDER)
for (int i = 0; i < nOT; i++)
{
as.push_back(G.get<Element::Scalar>());
As.push_back(as.back());
As.back().pack(os[0]);
}
send_if_ot_sender(P, os, ot_role);
os[0].reset_write_head();
if (ot_role & RECEIVER)
for (int i = 0; i < nOT; i++)
{
if (new_receiver_inputs)
receiver_inputs[i] = G.get_bit();
auto b = G.get<Element::Scalar>();
Element B = b;
auto A = os[1].get<Element>();
if (receiver_inputs[i])
B += A;
B.pack(os[0]);
receiver_outputs[i] = (A * b).hash(AES_BLK_SIZE);
}
send_if_ot_receiver(P, os, ot_role);
if (ot_role & SENDER)
for (int i = 0; i < nOT; i++)
{
auto B = os[1].get<Element>();
sender_inputs.at(i).at(0) = (B * as[i]).hash(AES_BLK_SIZE);
sender_inputs.at(i).at(1) = ((B - As[i]) * as[i]).hash(AES_BLK_SIZE);
}
#else
if (not cpu_has_avx(true))
throw runtime_error("SimpleOT needs AVX support");
int i, j, k;
@@ -179,6 +237,7 @@ void BaseOT::exec_base(bool new_receiver_inputs)
printf("\n");
#endif
}
#endif
for (int i = 0; i < nOT; i++)
{

View File

@@ -6,9 +6,9 @@
#ifndef OT_BITMATRIX_H_
#define OT_BITMATRIX_H_
#include "Tools/intrinsics.h"
#include <vector>
#include <emmintrin.h>
#include <immintrin.h>
#include <iostream>
using namespace std;

View File

@@ -5,8 +5,7 @@
#include "Math/gf2n.h"
#include "Tools/aes.h"
#include "Tools/MMO.h"
#include <wmmintrin.h>
#include <emmintrin.h>
#include "Tools/intrinsics.h"
OTExtension::OTExtension(const BaseOT& baseOT, TwoPartyPlayer* player,

View File

@@ -3,13 +3,12 @@
*
*/
#include <smmintrin.h>
#include <immintrin.h>
#include <mpirxx.h>
#include "BitMatrix.h"
#include "Tools/random.h"
#include "Tools/BitVector.h"
#include "Tools/intrinsics.h"
#include "Math/Square.h"
union matrix16x8

View File

@@ -287,7 +287,6 @@ enum
// Register types
enum RegType {
MODP,
INT,
SBIT,
CBIT,

View File

@@ -342,7 +342,6 @@ void BaseInstruction::parse_operands(istream& s, int pos, int file_pos)
// write to external client, input is : opcode num_args, client_id, message_type, var1, var2 ...
case WRITESOCKETC:
case WRITESOCKETS:
case WRITESOCKETSHARE:
case WRITESOCKETINT:
num_var_args = get_int(s) - 2;
@@ -350,6 +349,8 @@ void BaseInstruction::parse_operands(istream& s, int pos, int file_pos)
r[1] = get_int(s);
get_vector(num_var_args, start, s);
break;
case WRITESOCKETS:
throw runtime_error("sending MACs to client not supported any more");
case CONNECTIPV4:
throw runtime_error("parties as clients not supported any more");
case READCLIENTPUBLICKEY:
@@ -590,6 +591,7 @@ int BaseInstruction::get_reg_type() const
case SHLCI:
case SHRCI:
case CONVINT:
case PUBINPUT:
return CINT;
default:
if (is_gf2n_instruction())
@@ -1145,29 +1147,17 @@ inline void Instruction::execute(Processor<sint, sgf2n>& Proc) const
// read shares and MAC shares
Proc.read_socket_private(Proc.read_Ci(r[0]), start, true);
break;
case GREADSOCKETS:
//Proc.get_S2_ref(r[0]).get_share().pack(socket_octetstream);
//Proc.get_S2_ref(r[0]).get_mac().pack(socket_octetstream);
break;
case WRITESOCKETINT:
Proc.write_socket(INT, CLEAR, false, Proc.read_Ci(r[0]), r[1], start);
Proc.write_socket(INT, Proc.read_Ci(r[0]), r[1], start);
break;
case WRITESOCKETC:
Proc.write_socket(MODP, CLEAR, false, Proc.read_Ci(r[0]), r[1], start);
break;
case WRITESOCKETS:
// Send shares + MACs
Proc.write_socket(MODP, SECRET, true, Proc.read_Ci(r[0]), r[1], start);
Proc.write_socket(CINT, Proc.read_Ci(r[0]), r[1], start);
break;
case WRITESOCKETSHARE:
// Send only shares, no MACs
// N.B. doesn't make sense to have a corresponding read instruction for this
Proc.write_socket(MODP, SECRET, false, Proc.read_Ci(r[0]), r[1], start);
Proc.write_socket(SINT, Proc.read_Ci(r[0]), r[1], start);
break;
/*case GWRITESOCKETS:
Proc.get_S2_ref(r[0]).get_share().pack(socket_octetstream);
Proc.get_S2_ref(r[0]).get_mac().pack(socket_octetstream);
break;*/
case WRITEFILESHARE:
// Write shares to file system
Proc.write_shares_to_file(start);

View File

@@ -45,6 +45,8 @@ class Machine : public BaseMachine
// Keep record of used offline data
DataPositions pos;
Player* P;
void load_program(const string& threadname, const string& filename);
public:
@@ -75,6 +77,7 @@ class Machine : public BaseMachine
const string& memtype, int lg2, bool direct, int opening_sum,
bool receive_threads, int max_broadcast, bool use_encryption, bool live_prep,
OnlineOptions opts);
~Machine();
const Names& get_N() { return N; }

View File

@@ -49,7 +49,6 @@ Machine<sint, sgf2n>::Machine(int my_number, Names& playerNames,
// make directory for outputs if necessary
mkdir_p(PREP_DIR);
Player* P;
if (use_encryption)
P = new CryptoPlayer(N, 0xF00);
else
@@ -103,8 +102,6 @@ Machine<sint, sgf2n>::Machine(int my_number, Names& playerNames,
ot_setups.push_back({ *P, true });
}
delete P;
/* Set up the threads */
tinfo.resize(nthreads);
threads.resize(nthreads);
@@ -131,6 +128,12 @@ Machine<sint, sgf2n>::Machine(int my_number, Names& playerNames,
}
}
template<class sint, class sgf2n>
Machine<sint, sgf2n>::~Machine()
{
delete P;
}
template<class sint, class sgf2n>
void Machine<sint, sgf2n>::load_program(const string& threadname,
const string& filename)
@@ -318,7 +321,7 @@ void Machine<sint, sgf2n>::run()
print_timers();
cerr << "Data sent = " << data_sent / 1e6 << " MB" << endl;
PlainPlayer P(N, 0xFF00);
auto& P = *this->P;
Bundle<octetStream> bundle(P);
bundle.mine.store(data_sent.load());
P.Broadcast_Receive_no_stats(bundle);

View File

@@ -34,6 +34,8 @@ template<class W>
template<class T, class U>
int OfflineMachine<W>::run()
{
T::clear::init_default(this->online_opts.prime_length());
U::clear::init_field(U::clear::default_degree());
T::bit_type::mac_key_type::init_field();
auto binary_mac_key = read_generate_write_mac_key<typename T::bit_type>(P);
GC::ShareThread<typename T::bit_type> thread(playerNames,
@@ -52,7 +54,6 @@ template<class W>
template<class T>
void OfflineMachine<W>::generate()
{
T::clear::init_default(this->online_opts.prime_length());
T::clear::next::template init<typename T::clear>(false);
T::clear::template write_setup<T>(P.num_players());
auto mac_key = read_generate_write_mac_key<T>(P);

View File

@@ -200,12 +200,10 @@ void OnlineMachine::start_networking()
} else {
if (not opt.get("-ext-server")->isSet)
{
if (my_port != Names::DEFAULT_PORT)
throw runtime_error("cannot set port number when not using Server.x");
if (nplayers == 0)
opt.get("-N")->getInt(nplayers);
server = Server::start_networking(playerNames, mynum, nplayers,
hostname, pnbase);
hostname, pnbase, my_port);
}
else
{

View File

@@ -224,7 +224,7 @@ class Processor : public ArithmeticProcessor
// Access to external client sockets for reading clear/shared data
void read_socket_ints(int client_id, const vector<int>& registers);
void write_socket(const RegType reg_type, const SecrecyType secrecy_type, const bool send_macs,
void write_socket(const RegType reg_type,
int socket_id, int message_type, const vector<int>& registers);
void read_socket_vector(int client_id, const vector<int>& registers);

View File

@@ -241,7 +241,7 @@ void Processor<sint, sgf2n>::split(const Instruction& instruction)
// If message_type is > 0, send message_type in bytes 0 - 3, to allow an external client to
// determine the data structure being sent in a message.
template<class sint, class sgf2n>
void Processor<sint, sgf2n>::write_socket(const RegType reg_type, const SecrecyType secrecy_type, const bool send_macs,
void Processor<sint, sgf2n>::write_socket(const RegType reg_type,
int socket_id, int message_type, const vector<int>& registers)
{
int m = registers.size();
@@ -254,26 +254,23 @@ void Processor<sint, sgf2n>::write_socket(const RegType reg_type, const SecrecyT
for (int i = 0; i < m; i++)
{
if (reg_type == MODP && secrecy_type == SECRET) {
// Send vector of secret shares and optionally macs
if (send_macs)
get_Sp_ref(registers[i]).pack(socket_stream);
else
get_Sp_ref(registers[i]).pack(socket_stream,
sint::get_rec_factor(P.my_num(), P.num_players()));
if (reg_type == SINT) {
// Send vector of secret shares
get_Sp_ref(registers[i]).pack(socket_stream,
sint::get_rec_factor(P.my_num(), P.num_players()));
}
else if (reg_type == MODP && secrecy_type == CLEAR) {
else if (reg_type == CINT) {
// Send vector of clear public field elements
get_Cp_ref(registers[i]).pack(socket_stream);
}
else if (reg_type == INT && secrecy_type == CLEAR) {
else if (reg_type == INT) {
// Send vector of 32-bit clear ints
socket_stream.store((int&)get_Ci_ref(registers[i]));
}
else {
stringstream ss;
ss << "Write socket instruction with unknown reg type " << reg_type <<
" and secrecy type " << secrecy_type << "." << endl;
"." << endl;
throw Processor_Error(ss.str());
}
}

View File

@@ -1,4 +1,11 @@
import ml
import ml, sys
if len(program.args) < 2:
print("You need to identify a concrete network such as 'v1_0.25_128'.",
file=sys.stderr)
print("Refer to https://github.com/anderspkd/SecureQ8 for scripts to run "
"this benchmark.", file=sys.stderr)
exit(1)
network = program.args[1]

View File

@@ -3,7 +3,7 @@ prog = program
from Compiler.GC.types import *
from Compiler.GC.instructions import *
bits.unit = 128
bits.unit = 64
program.to_merge = [ldmsdi, stmsdi, ldmsd, stmsd, stmsdci, xors, andrs]
program.stop_class = type(None)
@@ -11,7 +11,7 @@ program.stop_class = type(None)
from Compiler.circuit_oram import *
from Compiler import circuit_oram
from Compiler import oram
import oram
oram.n_threads = 1
oram.n_threads_for_tree = 1

View File

@@ -58,16 +58,6 @@ public:
{
}
void add(T a, T b, int = 0, T = {})
{
*this = a + b;
}
void sub(T a, T b, int = 0, T = {})
{
*this = a - b;
}
static void split(vector<bit_type>& dest, const vector<int>& regs,
int n_bits, const This* source, int n_inputs,
GC::FakeSecret::Protocol& protocol);

View File

@@ -161,15 +161,6 @@ public:
void exchange(const Player& P);
};
template <class T>
class Passing_MAC_Check : public Direct_MAC_Check<T>
{
public:
Passing_MAC_Check(const typename T::mac_key_type::Scalar& ai);
void exchange(const Player& P);
};
enum mc_timer { SEND, RECV_ADD, BCAST, RECV_SUM, SEED, COMMIT, WAIT_SUMMER, RECV, SUM, SELECT, MAX_TIMER };

View File

@@ -390,25 +390,6 @@ void Direct_MAC_Check<T>::exchange(const Player& P)
this->CheckIfNeeded(P);
}
template<class T>
Passing_MAC_Check<T>::Passing_MAC_Check(const typename T::mac_key_type::Scalar& ai) :
Direct_MAC_Check<T>(ai)
{
}
template<class T>
void passing_add_openings(vector<T>& values, octetStream& os)
{
octetStream new_os;
for (unsigned int i=0; i<values.size(); i++)
{
T tmp;
tmp.unpack(os);
(tmp + values[i]).pack(new_os);
}
os = new_os;
}
template<class T>
void Direct_MAC_Check<T>::init_open(const Player& P, int n)
{
@@ -422,20 +403,4 @@ void Direct_MAC_Check<T>::prepare_open(const T& secret)
this->macs.push_back(secret.get_mac());
}
template<class T>
void Passing_MAC_Check<T>::exchange(const Player& P)
{
this->pre_exchange(P);
for (int i = 0; i < P.num_players() - 1; i++)
{
P.pass_around(this->os);
passing_add_openings(this->values, this->os);
}
for (auto& x : this->values)
x.unpack(this->os);
this->AddToValues(this->values);
this->popen_cnt += this->values.size();
this->CheckIfNeeded(P);
}
#endif

View File

@@ -125,7 +125,6 @@ template<class U>
void ShuffleSacrifice::shuffle(vector<U>& check_triples, Player& P)
{
int buffer_size = check_triples.size();
assert(buffer_size >= minimum_n_inputs());
// shuffle
GlobalPRNG G(P);
@@ -137,13 +136,24 @@ void ShuffleSacrifice::shuffle(vector<U>& check_triples, Player& P)
}
}
template<class T>
TripleShuffleSacrifice<T>::TripleShuffleSacrifice()
{
}
template<class T>
TripleShuffleSacrifice<T>::TripleShuffleSacrifice(int B, int C) :
ShuffleSacrifice(B, C)
{
}
template<class T>
void TripleShuffleSacrifice<T>::triple_sacrifice(vector<array<T, 3>>& triples,
vector<array<T, 3>>& check_triples, Player& P,
typename T::MAC_Check& MC, ThreadQueues* queues)
{
int buffer_size = check_triples.size();
int N = (buffer_size - C) / B;
size_t N = (buffer_size - C) / B;
shuffle(check_triples, P);
@@ -161,7 +171,9 @@ void TripleShuffleSacrifice<T>::triple_sacrifice(vector<array<T, 3>>& triples,
if (typename T::clear(opened[3 * i] * opened[3 * i + 1]) != opened[3 * i + 2])
throw Offline_Check_Error("shuffle opening");
triples.resize(N);
// triples might be same as check_triples
if (triples.size() < N)
triples.resize(N);
if (queues)
{
@@ -172,6 +184,8 @@ void TripleShuffleSacrifice<T>::triple_sacrifice(vector<array<T, 3>>& triples,
}
else
triple_sacrifice(triples, check_triples, P, MC, 0, N);
triples.resize(N);
}
template<class T>
@@ -188,6 +202,7 @@ void TripleShuffleSacrifice<T>::triple_sacrifice(vector<array<T, 3>>& triples,
int N = buffer_size / B;
int size = end - begin;
masked.reserve(2 * size);
assert(size_t(end * B) <= check_triples.size());
for (int i = begin; i < end; i++)
{
T& a = check_triples[i][0];

View File

@@ -326,7 +326,8 @@ void buffer_bits_spec(ReplicatedPrep<T<gfp_<X, L>>>& prep, vector<T<gfp_<X, L>>>
typename T<gfp_<X, L>>::Protocol& prot)
{
(void) bits, (void) prot;
if (prot.get_n_relevant_players() > 10)
if (prot.get_n_relevant_players() > 10
or OnlineOptions::singleton.bits_from_squares)
buffer_bits_from_squares(prep);
else
prep.ReplicatedRingPrep<T<gfp_<X, L>>>::buffer_bits();

View File

@@ -101,15 +101,6 @@ public:
T::assign(buffer);
}
void add(const ShamirShare& x, const ShamirShare& y)
{
*this = x + y;
}
void sub(const ShamirShare& x, const ShamirShare& y)
{
*this = x - y;
}
void add(const ShamirShare& S, const clear aa, int my_num,
const T& alphai)
{

View File

@@ -18,7 +18,6 @@ template<class T> class Share;
template<class T> class MAC_Check_;
template<class T> class Direct_MAC_Check;
template<class T> class Passing_MAC_Check;
template<class T> class MascotMultiplier;
template<class T> class MascotFieldPrep;
template<class T> class MascotTripleGenerator;

View File

@@ -27,6 +27,7 @@ public:
const int C;
ShuffleSacrifice();
ShuffleSacrifice(int B, int C);
int minimum_n_inputs(int n_outputs = 1)
{
@@ -56,6 +57,9 @@ template<class T>
class TripleShuffleSacrifice : public ShuffleSacrifice
{
public:
TripleShuffleSacrifice();
TripleShuffleSacrifice(int B, int C);
void triple_sacrifice(vector<array<T, 3>>& triples,
vector<array<T, 3>>& check_triples, Player& P,
typename T::MAC_Check& MC, ThreadQueues* queues = 0);

View File

@@ -19,6 +19,12 @@ ShuffleSacrifice::ShuffleSacrifice() :
{
}
inline
ShuffleSacrifice::ShuffleSacrifice(int B, int C) :
B(B), C(C)
{
}
template<class T>
void TripleShuffleSacrifice<T>::triple_combine(vector<array<T, 3> >& triples,
vector<array<T, 3> >& to_combine, Player& P,

103
README.md
View File

@@ -39,7 +39,7 @@ parties and malicious security.
On Linux, this requires a working toolchain and [all
requirements](#requirements). On Ubuntu, the following might suffice:
```
apt-get install automake build-essential git libboost-dev libboost-thread-dev libsodium-dev libssl-dev libtool m4 python texinfo yasm
apt-get install automake build-essential git libboost-dev libboost-thread-dev libntl-dev libsodium-dev libssl-dev libtool m4 python3 texinfo yasm
```
On MacOS, this requires [brew](https://brew.sh) to be installed,
which will be used for all dependencies.
@@ -77,13 +77,74 @@ The following table lists all protocols that are fully supported.
| Malicious, dishonest majority | [MASCOT / LowGear / HighGear](#secret-sharing) | [SPDZ2k](#secret-sharing) | [Tiny / Tinier](#secret-sharing) | [BMR](#bmr) |
| Covert, dishonest majority | [CowGear / ChaiGear](#secret-sharing) | N/A | N/A | N/A |
| Semi-honest, dishonest majority | [Semi / Hemi / Soho](#secret-sharing) | [Semi2k](#secret-sharing) | [SemiBin](#secret-sharing) | [Yao's GC](#yaos-garbled-circuits) / [BMR](#bmr) |
| Malicious, honest majority | [Shamir / Rep3 / PS / SY](#honest-majority) | [Brain / Rep[34] / PS / SY](#honest-majority) | [Rep3 / CCD](#honest-majority) | [BMR](#bmr) |
| Malicious, honest majority | [Shamir / Rep3 / PS / SY](#honest-majority) | [Brain / Rep[34] / PS / SY](#honest-majority) | [Rep3 / CCD / PS](#honest-majority) | [BMR](#bmr) |
| Semi-honest, honest majority | [Shamir / Rep3](#honest-majority) | [Rep3](#honest-majority) | [Rep3 / CCD](#honest-majority) | [BMR](#bmr) |
See [this paper](https://eprint.iacr.org/2020/300) for an explanation
of the various security models and high-level introduction to
multi-party computation.
##### Finding the most efficient protocol
Lower security requirements generally allow for more efficient
protocols. Within the same security model (line in the table above),
there are a few things to consider:
- Computation domain: Arithmetic protocols (modulo prime or power of
two) are preferable for many applications because they offer integer
addition and multiplication at low cost. However, binary circuits
might a better option if there is very little integer
computation. [See below](#finding-the-most-efficient-variant) to
find the most efficient mixed-circuit variant. Furthermore, local
computation modulo a power of two is cheaper, but MP-SPDZ does not
offer this domain with homomorphic encryption.
- Secret sharing vs garbled circuits: Computation using secret sharing
requires a number of communication rounds that grows depending on
the computation, which is not the case for garbled
circuits. However, the cost of integer computation as a binary
circuit often offset this. MP-SPDZ only offers garbled circuit
with binary computation.
- Underlying technology for dishonest majority: While secret sharing
alone suffice honest-majority computation, dishonest majority
requires either homomorphic encryption (HE) or oblivious transfer
(OT). The two options offer a computation-communication trade-off:
While OT is easier to compute, HE requires less
communication. Furthermore, the latter requires a certain of
batching to be efficient, which makes OT preferable for smaller
tasks.
- Malicious, honest-majority three-party computation: A number of
protocols are available for this setting, but SY/SPDZ-wise is the
most efficient one for a number of reasons: It requires the lowest
communication, and it is the only one offering constant-communication
dot products.
- Minor variants: Some command-line options change aspects of the
protocols such as:
- `--bucket-size`: In some malicious binary computation and
malicious edaBit generation, a smaller bucket size allows
preprocessing in smaller batches at a higher asymptotic cost.
- `--batch-size`: Preprocessing in smaller batches avoids generating
too much but larger batches save communication rounds.
- `--direct`: In dishonest-majority protocols, direct communication
instead of star-shaped saves communication rounds at the expense
of a quadratic amount. This might be beneficial with a small
number of parties.
- `--bits-from-squares`: In some protocols computing modulo a prime
(Shamir, Rep3, SPDZ-wise), this switches from generating random
bits via XOR of parties' inputs to generation using the root of a
random square.
- `--top-gear`: In protocols with malicious security using
homomorphic encryption, this reduces the memory usage and batch
size for preprocessing.
#### Paper and Citation
The design of MP-SPDZ is described in [this
@@ -151,13 +212,16 @@ phase outputs the amount of offline material required, which allows to
compute the preprocessing time for a particular computation.
#### Requirements
- GCC 5 or later (tested with up to 10) or LLVM/clang 5 or later (tested with up to 11). We recommend clang because it performs better.
- GCC 5 or later (tested with up to 10) or LLVM/clang 5 or later
(only x86; tested with up to 11). For x86, we recommend clang
because it performs better.
- MPIR library, compiled with C++ support (use flag `--enable-cxx` when running configure). You can use `make -j8 tldr` to install it locally.
- libsodium library, tested against 1.0.16
- OpenSSL, tested against 1.1.1
- Boost.Asio with SSL support (`libboost-dev` on Ubuntu), tested against 1.65
- Boost.Thread for BMR (`libboost-thread-dev` on Ubuntu), tested against 1.65
- 64-bit CPU
- x86 or ARM 64-bit CPU (the latter tested with AWS Gravitron)
- Python 3.5 or later
- NTL library for homomorphic encryption (optional; tested with NTL 10.5)
- If using macOS, Sierra or later
@@ -168,13 +232,14 @@ compute the preprocessing time for a particular computation.
- By default, the binaries are optimized for the CPU you are
compiling on.
For all optimizations, a CPU supporting AES-NI, PCLMUL, AVX2, BMI2, ADX is
For all optimizations on x86, a CPU supporting AES-NI, PCLMUL, AVX2, BMI2, ADX is
required. This includes mainstream processors released 2014 or later.
If you intend to run on a different CPU than compiling, you might
need to change the `ARCH` variable in `CONFIG` or `CONFIG.mine` to
`-march=<cpu>`. See the [GCC
documentation](https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html)
for the possible options.
for the possible options. To run OT-based protocols on x86 without AVX,
add `AVX_OT = 0` in addition.
- To benchmark online-only protocols or Overdrive offline phases, add the following line at the top: `MY_CFLAGS = -DINSECURE`
- `PREP_DIR` should point to a local, unversioned directory to store preprocessing data (the default is `Player-Data` in the current directory).
- For homomorphic encryption, set `USE_NTL = 1`.
@@ -299,6 +364,19 @@ compiler where `n` is the number of parties for the standard variant
and 2 for the special
variant by Mohassel and Rindal (available in Rep3 only).
##### Finding the most efficient variant
Where available, local share conversion is likely the most efficient
variant. Protocols based on Shamir secret sharing are unlikely to
benefit from mixed-circuit computation because they use an extension
field for binary computation. Otherwise, edaBits likely offer an
asymptotic benefit. However, malicious protocols by default generate
large batches of edaBits (more than one million at once), which is
only worthwhile for accordingly large computation. For smaller
computation, try running the virtual machines with `-B 4` or `-B 5`,
which reduces the batch size to ~10,000 and ~1,000, respectively, at a
higher asymptotic cost.
#### Bristol Fashion circuits
Bristol Fashion is the name of a description format of binary circuits
@@ -386,7 +464,8 @@ This runs the compiled bytecode in cleartext computation.
Some full implementations require oblivious transfer, which is
implemented as OT extension based on
https://github.com/mkskeller/SimpleOT.
https://github.com/mkskeller/SimpleOT or OpenSSL (activate the
latter with `AVX_OT = 0` in `CONFIG` or `CONFIG.mine`).
### Secret sharing
@@ -524,6 +603,7 @@ The following table shows all programs for honest-majority computation:
| `rep4-ring-party.x` | Replicated | Mod 2^k | Y | 4 | `rep4-ring.sh` |
| `replicated-bin-party.x` | Replicated | Binary | N | 3 | `replicated.sh` |
| `malicious-rep-bin-party.x` | Replicated | Binary | Y | 3 | `mal-rep-bin.sh` |
| `ps-rep-bin-party.x` | Replicated | Binary | Y | 3 | `ps-rep-bin.sh` |
| `replicated-field-party.x` | Replicated | Mod prime | N | 3 | `rep-field.sh` |
| `ps-rep-field-party.x` | Replicated | Mod prime | Y | 3 | `ps-rep-field.sh` |
| `sy-rep-field-party.x` | SPDZ-wise replicated | Mod prime | Y | 3 | `sy-rep-field.sh` |
@@ -537,7 +617,7 @@ The following table shows all programs for honest-majority computation:
We use the "generate random triple optimistically/sacrifice/Beaver"
methodology described by [Lindell and
Nof](https://eprint.iacr.org/2017/816) to achieve malicious
security with plain replicated secret sharing,
security with plain arithmetic replicated secret sharing,
except for the "PS" (post-sacrifice) protocols where the
actual multiplication is executed optimistically and checked later as
also described by Lindell and Nof.
@@ -563,6 +643,13 @@ secret value and information-theoretic tag similar to SPDZ but not
with additive secret sharing, hence the name.
Rep4 refers to the four-party protocol by [Dalskov et
al.](https://eprint.iacr.org/2020/1330).
`malicious-rep-bin-party.x` is based on cut-and-choose triple
generation by [Furukawa et al.](https://eprint.iacr.org/2016/944) but
using Beaver multiplication instead of their post-sacrifice
approach. `ps-rep-bin-party.x` is based on the post-sacrifice approach
by [Araki et
al.](https://www.ieee-security.org/TC/SP2017/papers/96.pdf) but
without using their cache optimization.
All protocols in this section require encrypted channels because the
information received by the honest majority suffices the reconstruct

View File

@@ -35,5 +35,7 @@ done
$prefix ./bmr-program-tparty.x $prog $netmap 2>&1 &> bmr-log/t &
for i in $(seq $[n_players-1]); do
$prefix ./bmr-program-party.x $i $prog $netmap $threshold 2>&1 &> bmr-log/$i &
id=$!
done
$prefix ./bmr-program-party.x $n_players $prog $netmap $threshold 2>&1 | tee bmr-log/$n_players
wait $id

View File

@@ -3,7 +3,7 @@
HERE=$(cd `dirname $0`; pwd)
SPDZROOT=$HERE/..
export PLAYERS=3
export PLAYERS=${PLAYERS:-3}
. $HERE/run-common.sh

View File

@@ -3,7 +3,7 @@
HERE=$(cd `dirname $0`; pwd)
SPDZROOT=$HERE/..
export PLAYERS=3
export PLAYERS=${PLAYERS:-3}
. $HERE/run-common.sh

10
Scripts/ps-rep-bin.sh Executable file
View File

@@ -0,0 +1,10 @@
#!/usr/bin/env bash
HERE=$(cd `dirname $0`; pwd)
SPDZROOT=$HERE/..
export PLAYERS=3
. $HERE/run-common.sh
run_player ps-rep-bin-party.x $* || exit 1

View File

@@ -7,6 +7,8 @@ while getopts XYC opt; do
;;
Y) dabit=2
;;
C) cont=1
;;
esac
done
@@ -31,7 +33,7 @@ function test_vm
echo == Party $i
cat logs/tutorial-$i
done
exit 1
test -z $cont && exit 1
fi
}
@@ -88,7 +90,7 @@ fi
./compile.py -B 16 $compile_opts tutorial
for i in replicated mal-rep-bin semi-bin ccd mal-ccd; do
for i in replicated mal-rep-bin ps-rep-bin semi-bin ccd mal-ccd; do
test_vm $i $run_opts
done

View File

@@ -41,7 +41,7 @@ BitVector BitVector::operator &(const BitVector& other) const
bool BitVector::parity() const
{
#if defined(__SSE4_2__) or not defined(__clang__)
#if (defined(__SSE4_2__) or not defined(__clang__)) and defined(__x86_64__)
bool res = 0;
for (size_t i = 0; i < size_bytes() / 8; i++)
res ^= _popcnt64(((word*)bytes)[i]) & 1;
@@ -49,7 +49,17 @@ bool BitVector::parity() const
res ^= _popcnt32(bytes[i]) & 1;
return res;
#else
throw runtime_error("need to compile with SSE4.2 support or GCC");
bool res = 0;
for (size_t i = 0; i < size_bytes() / 8; i++)
{
word x = ((word*)bytes)[i];
for (int i = 5; i >= 0; i--)
x ^= (x >> (1 << i));
res ^= (x & 1);
}
for (size_t i = size_bytes() / 8 * 8; i < size_bytes(); i++)
res ^= (*this)[i];
return res;
#endif
}
@@ -131,12 +141,19 @@ void BitVector::input(istream& s,bool human)
void BitVector::pack(octetStream& o) const
{
o.store(nbytes);
o.store_int(nbits, 8);
o.append((octet*)bytes, nbytes);
}
void BitVector::unpack(octetStream& o)
{
o.get(nbytes);
resize(o.get_int(8));
o.consume((octet*)bytes, nbytes);
}
BitVector& BitVector::operator =(const octetStream other)
{
resize(other.get_length() * 8);
memcpy(bytes, other.get_data(), nbytes);
return *this;
}

View File

@@ -7,7 +7,6 @@
#include <vector>
using namespace std;
#include <stdlib.h>
#include <pmmintrin.h>
#include <assert.h>
#include "Tools/Exceptions.h"
@@ -15,6 +14,7 @@ using namespace std;
// just for util functions
#include "Math/gf2nlong.h"
#include "Math/FixedVec.h"
#include "Tools/intrinsics.h"
class PRNG;
class octetStream;
@@ -137,6 +137,8 @@ class BitVector
return *this;
}
BitVector& operator=(const octetStream other);
void swap(BitVector& other)
{
std::swap(nbits, other.nbits);
@@ -156,7 +158,7 @@ class BitVector
void operator=(const Access& other) { *this = other.get(); }
void operator^=(const Access& other) { *this = get() ^ other.get(); }
bool operator==(const Access& other) const { return get() == other.get(); }
bool operator==(bool b) const { return get() == b; }
operator bool() const { return get(); }
};
bool operator[](int i) const { return get_bit(i); }
@@ -242,6 +244,11 @@ class BitVector
return true;
}
bool operator==(const BitVector& other)
{
return equals(other);
}
void append(const BitVector& other, size_t length);
void randomize(PRNG& G);

328
Tools/aes-arm.h Normal file
View File

@@ -0,0 +1,328 @@
// This file is reduced to functionality necessary for AES in order to avoid
// conflicts with simde.
/*
* sse2neon is freely redistributable under the MIT License.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if defined(__GNUC__) || defined(__clang__)
#pragma push_macro("FORCE_INLINE")
#pragma push_macro("ALIGN_STRUCT")
#define FORCE_INLINE static inline __attribute__((always_inline))
#define ALIGN_STRUCT(x) __attribute__((aligned(x)))
#else
#error "Macro name collisions may happen with unsupported compiler."
#ifdef FORCE_INLINE
#undef FORCE_INLINE
#endif
#define FORCE_INLINE static inline
#ifndef ALIGN_STRUCT
#define ALIGN_STRUCT(x) __declspec(align(x))
#endif
#endif
#define vreinterpretq_m128i_u8(x) vreinterpretq_s64_u8(x)
#define vreinterpretq_m128i_u32(x) vreinterpretq_s64_u32(x)
#define vreinterpretq_u8_m128i(x) vreinterpretq_u8_s64(x)
// A struct is defined in this header file called 'SIMDVec' which can be used
// by applications which attempt to access the contents of an _m128 struct
// directly. It is important to note that accessing the __m128 struct directly
// is bad coding practice by Microsoft: @see:
// https://msdn.microsoft.com/en-us/library/ayeb3ayc.aspx
//
// However, some legacy source code may try to access the contents of an __m128
// struct directly so the developer can use the SIMDVec as an alias for it. Any
// casting must be done manually by the developer, as you cannot cast or
// otherwise alias the base NEON data type for intrinsic operations.
//
// union intended to allow direct access to an __m128 variable using the names
// that the MSVC compiler provides. This union should really only be used when
// trying to access the members of the vector as integer values. GCC/clang
// allow native access to the float members through a simple array access
// operator (in C since 4.6, in C++ since 4.8).
//
// Ideally direct accesses to SIMD vectors should not be used since it can cause
// a performance hit. If it really is needed however, the original __m128
// variable can be aliased with a pointer to this union and used to access
// individual components. The use of this union should be hidden behind a macro
// that is used throughout the codebase to access the members instead of always
// declaring this type of variable.
typedef union ALIGN_STRUCT(16) SIMDVec {
float m128_f32[4]; // as floats - DON'T USE. Added for convenience.
int8_t m128_i8[16]; // as signed 8-bit integers.
int16_t m128_i16[8]; // as signed 16-bit integers.
int32_t m128_i32[4]; // as signed 32-bit integers.
int64_t m128_i64[2]; // as signed 64-bit integers.
uint8_t m128_u8[16]; // as unsigned 8-bit integers.
uint16_t m128_u16[8]; // as unsigned 16-bit integers.
uint32_t m128_u32[4]; // as unsigned 32-bit integers.
uint64_t m128_u64[2]; // as unsigned 64-bit integers.
} SIMDVec;
// casting using SIMDVec
#define vreinterpretq_nth_u64_m128i(x, n) (((SIMDVec *) &x)->m128_u64[n])
#define vreinterpretq_nth_u32_m128i(x, n) (((SIMDVec *) &x)->m128_u32[n])
#define vreinterpretq_nth_u8_m128i(x, n) (((SIMDVec *) &x)->m128_u8[n])
/* Backwards compatibility for compilers with lack of specific type support */
// Older gcc does not define vld1q_u8_x4 type
#if defined(__GNUC__) && !defined(__clang__) && \
((__GNUC__ == 10 && (__GNUC_MINOR__ <= 1)) || \
(__GNUC__ == 9 && (__GNUC_MINOR__ <= 3)) || \
(__GNUC__ == 8 && (__GNUC_MINOR__ <= 4)) || __GNUC__ <= 7)
FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
{
uint8x16x4_t ret;
ret.val[0] = vld1q_u8(p + 0);
ret.val[1] = vld1q_u8(p + 16);
ret.val[2] = vld1q_u8(p + 32);
ret.val[3] = vld1q_u8(p + 48);
return ret;
}
#else
// Wraps vld1q_u8_x4
FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
{
return vld1q_u8_x4(p);
}
#endif
#if !defined(__ARM_FEATURE_CRYPTO)
/* clang-format off */
#define SSE2NEON_AES_DATA(w) \
{ \
w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), \
w(0xc5), w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), \
w(0xab), w(0x76), w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), \
w(0x59), w(0x47), w(0xf0), w(0xad), w(0xd4), w(0xa2), w(0xaf), \
w(0x9c), w(0xa4), w(0x72), w(0xc0), w(0xb7), w(0xfd), w(0x93), \
w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc), w(0x34), w(0xa5), \
w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15), w(0x04), \
w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a), \
w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), \
w(0x75), w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), \
w(0x5a), w(0xa0), w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), \
w(0xe3), w(0x2f), w(0x84), w(0x53), w(0xd1), w(0x00), w(0xed), \
w(0x20), w(0xfc), w(0xb1), w(0x5b), w(0x6a), w(0xcb), w(0xbe), \
w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf), w(0xd0), w(0xef), \
w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85), w(0x45), \
w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8), \
w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), \
w(0xf5), w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), \
w(0xf3), w(0xd2), w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), \
w(0x97), w(0x44), w(0x17), w(0xc4), w(0xa7), w(0x7e), w(0x3d), \
w(0x64), w(0x5d), w(0x19), w(0x73), w(0x60), w(0x81), w(0x4f), \
w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88), w(0x46), w(0xee), \
w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb), w(0xe0), \
w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c), \
w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), \
w(0x79), w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), \
w(0x4e), w(0xa9), w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), \
w(0x7a), w(0xae), w(0x08), w(0xba), w(0x78), w(0x25), w(0x2e), \
w(0x1c), w(0xa6), w(0xb4), w(0xc6), w(0xe8), w(0xdd), w(0x74), \
w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a), w(0x70), w(0x3e), \
w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e), w(0x61), \
w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e), \
w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), \
w(0x94), w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), \
w(0x28), w(0xdf), w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), \
w(0xe6), w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), \
w(0xb0), w(0x54), w(0xbb), w(0x16) \
}
/* clang-format on */
/* X Macro trick. See https://en.wikipedia.org/wiki/X_Macro */
#define SSE2NEON_AES_H0(x) (x)
static const uint8_t SSE2NEON_sbox[256] = SSE2NEON_AES_DATA(SSE2NEON_AES_H0);
#undef SSE2NEON_AES_H0
// In the absence of crypto extensions, implement aesenc using regular neon
// intrinsics instead. See:
// https://www.workofard.com/2017/01/accelerated-aes-for-the-arm64-linux-kernel/
// https://www.workofard.com/2017/07/ghash-for-low-end-cores/ and
// https://github.com/ColinIanKing/linux-next-mirror/blob/b5f466091e130caaf0735976648f72bd5e09aa84/crypto/aegis128-neon-inner.c#L52
// for more information Reproduced with permission of the author.
FORCE_INLINE __m128i _mm_aesenc_si128(__m128i EncBlock, __m128i RoundKey)
{
#if defined(__aarch64__)
static const uint8_t shift_rows[] = {0x0, 0x5, 0xa, 0xf, 0x4, 0x9,
0xe, 0x3, 0x8, 0xd, 0x2, 0x7,
0xc, 0x1, 0x6, 0xb};
static const uint8_t ror32by8[] = {0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc};
uint8x16_t v;
uint8x16_t w = vreinterpretq_u8_m128i(EncBlock);
// shift rows
w = vqtbl1q_u8(w, vld1q_u8(shift_rows));
// sub bytes
v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(SSE2NEON_sbox), w);
v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0x40), w - 0x40);
v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0x80), w - 0x80);
v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0xc0), w - 0xc0);
// mix columns
w = (v << 1) ^ (uint8x16_t)(((int8x16_t) v >> 7) & 0x1b);
w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v);
w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
// add round key
return vreinterpretq_m128i_u8(w) ^ RoundKey;
#else /* ARMv7-A NEON implementation */
#define SSE2NEON_AES_B2W(b0, b1, b2, b3) \
(((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | \
(b0))
#define SSE2NEON_AES_F2(x) ((x << 1) ^ (((x >> 7) & 1) * 0x011b /* WPOLY */))
#define SSE2NEON_AES_F3(x) (SSE2NEON_AES_F2(x) ^ x)
#define SSE2NEON_AES_U0(p) \
SSE2NEON_AES_B2W(SSE2NEON_AES_F2(p), p, p, SSE2NEON_AES_F3(p))
#define SSE2NEON_AES_U1(p) \
SSE2NEON_AES_B2W(SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p, p)
#define SSE2NEON_AES_U2(p) \
SSE2NEON_AES_B2W(p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p)
#define SSE2NEON_AES_U3(p) \
SSE2NEON_AES_B2W(p, p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p))
static const uint32_t ALIGN_STRUCT(16) aes_table[4][256] = {
SSE2NEON_AES_DATA(SSE2NEON_AES_U0),
SSE2NEON_AES_DATA(SSE2NEON_AES_U1),
SSE2NEON_AES_DATA(SSE2NEON_AES_U2),
SSE2NEON_AES_DATA(SSE2NEON_AES_U3),
};
#undef SSE2NEON_AES_B2W
#undef SSE2NEON_AES_F2
#undef SSE2NEON_AES_F3
#undef SSE2NEON_AES_U0
#undef SSE2NEON_AES_U1
#undef SSE2NEON_AES_U2
#undef SSE2NEON_AES_U3
uint32_t x0 = _mm_cvtsi128_si32(EncBlock);
uint32_t x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0x55));
uint32_t x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0xAA));
uint32_t x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0xFF));
__m128i out = _mm_set_epi32(
(aes_table[0][x3 & 0xff] ^ aes_table[1][(x0 >> 8) & 0xff] ^
aes_table[2][(x1 >> 16) & 0xff] ^ aes_table[3][x2 >> 24]),
(aes_table[0][x2 & 0xff] ^ aes_table[1][(x3 >> 8) & 0xff] ^
aes_table[2][(x0 >> 16) & 0xff] ^ aes_table[3][x1 >> 24]),
(aes_table[0][x1 & 0xff] ^ aes_table[1][(x2 >> 8) & 0xff] ^
aes_table[2][(x3 >> 16) & 0xff] ^ aes_table[3][x0 >> 24]),
(aes_table[0][x0 & 0xff] ^ aes_table[1][(x1 >> 8) & 0xff] ^
aes_table[2][(x2 >> 16) & 0xff] ^ aes_table[3][x3 >> 24]));
return _mm_xor_si128(out, RoundKey);
#endif
}
// Perform the last round of an AES encryption flow on data (state) in a using
// the round key in RoundKey, and store the result in dst.
// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128
FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
{
/* FIXME: optimized for NEON */
uint8_t v[4][4] = {
[0] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 0)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 5)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 10)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 15)]},
[1] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 4)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 9)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 14)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 3)]},
[2] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 8)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 13)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 2)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 7)]},
[3] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 12)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 1)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 6)],
SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 11)]},
};
for (int i = 0; i < 16; i++)
vreinterpretq_nth_u8_m128i(a, i) =
v[i / 4][i % 4] ^ vreinterpretq_nth_u8_m128i(RoundKey, i);
return a;
}
// Emits the Advanced Encryption Standard (AES) instruction aeskeygenassist.
// This instruction generates a round key for AES encryption. See
// https://kazakov.life/2017/11/01/cryptocurrency-mining-on-ios-devices/
// for details.
//
// https://msdn.microsoft.com/en-us/library/cc714138(v=vs.120).aspx
FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i key, const int rcon)
{
uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55));
uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF));
for (int i = 0; i < 4; ++i) {
((uint8_t *) &X1)[i] = SSE2NEON_sbox[((uint8_t *) &X1)[i]];
((uint8_t *) &X3)[i] = SSE2NEON_sbox[((uint8_t *) &X3)[i]];
}
return _mm_set_epi32(((X3 >> 8) | (X3 << 24)) ^ rcon, X3,
((X1 >> 8) | (X1 << 24)) ^ rcon, X1);
}
#undef SSE2NEON_AES_DATA
#else /* __ARM_FEATURE_CRYPTO */
// Implements equivalent of 'aesenc' by combining AESE (with an empty key) and
// AESMC and then manually applying the real key as an xor operation. This
// unfortunately means an additional xor op; the compiler should be able to
// optimize this away for repeated calls however. See
// https://blog.michaelbrase.com/2018/05/08/emulating-x86-aes-intrinsics-on-armv8-a
// for more details.
FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i b)
{
return vreinterpretq_m128i_u8(
vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0))) ^
vreinterpretq_u8_m128i(b));
}
// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128
FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey)
{
return _mm_xor_si128(vreinterpretq_m128i_u8(vaeseq_u8(
vreinterpretq_u8_m128i(a), vdupq_n_u8(0))),
RoundKey);
}
FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
{
// AESE does ShiftRows and SubBytes on A
uint8x16_t u8 = vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0));
uint8x16_t dest = {
// Undo ShiftRows step from AESE and extract X1 and X3
u8[0x4], u8[0x1], u8[0xE], u8[0xB], // SubBytes(X1)
u8[0x1], u8[0xE], u8[0xB], u8[0x4], // ROT(SubBytes(X1))
u8[0xC], u8[0x9], u8[0x6], u8[0x3], // SubBytes(X3)
u8[0x9], u8[0x6], u8[0x3], u8[0xC], // ROT(SubBytes(X3))
};
uint32x4_t r = {0, (unsigned) rcon, 0, (unsigned) rcon};
return vreinterpretq_m128i_u8(dest) ^ vreinterpretq_m128i_u32(r);
}
#endif

Some files were not shown because too many files have changed in this diff Show More