Files
MP-SPDZ/GC/Processor.hpp
2025-12-24 13:47:42 +11:00

583 lines
15 KiB
C++

/*
* Processor.cpp
*
*/
#ifndef GC_PROCESSOR_HPP_
#define GC_PROCESSOR_HPP_
#include <GC/Processor.h>
#include <iostream>
#include <iomanip>
using namespace std;
#include "GC/Program.h"
#include "Access.h"
#include "Processor/FixInput.h"
#include "Math/BitVec.h"
#include "GC/Machine.hpp"
#include "Processor/Processor.hpp"
#include "Processor/IntInput.hpp"
#include "Math/bigint.hpp"
namespace GC
{
template <class T>
Processor<T>::Processor(Machine<T>& machine) :
Processor<T>(machine, &machine)
{
}
template <class T>
Processor<T>::Processor(Memories<T>& memories, Machine<T>* machine) :
machine(machine), memories(memories), PC(0), time(0),
complexity(0)
{
}
template<class T>
Processor<T>::~Processor()
{
#ifdef VERBOSE
if (xor_timer.elapsed() > 0)
cerr << "XOR time: " << xor_timer.elapsed() << endl;
if (time > 0)
cerr << "Finished after " << time << " instructions" << endl;
#endif
}
template <class T>
template <class U>
void Processor<T>::reset(const U& program, int arg)
{
S.resize(program.num_reg(SBIT));
C.resize(program.num_reg(CBIT));
I.resize(program.num_reg(INT));
set_arg(arg);
PC = 0;
}
template <class T>
template <class U>
void Processor<T>::reset(const U& program)
{
reset(program, 0);
if (machine)
machine->reset(program);
memories.reset(program);
}
template<class T>
inline long long GC::Processor<T>::get_input(const int* params, bool interactive)
{
assert(params[0] <= 64);
return get_long_input<Integer>(params, *this, interactive).get();
}
template<class T>
template<class U>
U GC::Processor<T>::get_long_input(const int* params,
ProcessorBase& input_proc, bool interactive)
{
if (not T::actual_inputs)
return {};
U res;
if (params[1] == 0)
res = input_proc.get_input<IntInput<U>>(interactive, 0).items[0];
else
res = input_proc.get_input<FixInput_<U>>(interactive,
&params[1]).items[0];
check_input(res, params);
return res;
}
template<class T>
template<class U>
void GC::Processor<T>::check_input(const U& in, const int* params)
{
int n_bits = *params;
auto test = in >> n_bits;
if (n_bits == 1)
{
if (not (in == 0 or in == 1))
throw runtime_error("input not a bit: " + to_string(in));
}
else if (not (test == 0 or test == -1))
{
if (params[1] == 0)
throw runtime_error(
"input out of range for a " + std::to_string(n_bits)
+ "-bit (un)signed integer: " + to_string(in));
else
throw runtime_error(
"input out of range for a " + to_string(n_bits)
+ "-bit fixed-point number with "
+ to_string(params[1]) + "-bit precision: "
+ to_string(
mpf_class(bigint(in)) * exp2(-params[1])));
}
}
template <class T>
void Processor<T>::bitdecc(const vector<int>& regs, const Clear& x)
{
for (unsigned int i = 0; i < regs.size(); i++)
C[regs[i]] = (x >> i) & 1;
}
template <class T>
void Processor<T>::bitdecint(const vector<int>& regs, const Integer& x)
{
for (unsigned int i = 0; i < regs.size(); i++)
I[regs[i]] = (x >> i) & 1;
}
template<class T>
template<class U>
void Processor<T>::load_dynamic_direct(const vector<int>& args,
U& dynamic_memory)
{
vector< ReadAccess<T> > accesses;
if (args.size() % 3 != 0)
throw runtime_error("invalid number of arguments");
for (size_t i = 0; i < args.size(); i += 3)
accesses.push_back({S[args[i]], args[i+1], args[i+2], complexity});
T::load(accesses, dynamic_memory);
}
template<class T>
template<class U>
void GC::Processor<T>::load_dynamic_indirect(const vector<int>& args,
U& dynamic_memory)
{
vector< ReadAccess<T> > accesses;
if (args.size() % 3 != 0)
throw runtime_error("invalid number of arguments");
for (size_t i = 0; i < args.size(); i += 3)
accesses.push_back({S[args[i]], C[args[i+1]], args[i+2], complexity});
T::load(accesses, dynamic_memory);
}
template<class T>
template<class U>
void GC::Processor<T>::store_dynamic_direct(const vector<int>& args,
U& dynamic_memory)
{
vector< WriteAccess<T> > accesses;
if (args.size() % 2 != 0)
throw runtime_error("invalid number of arguments");
for (size_t i = 0; i < args.size(); i += 2)
accesses.push_back({args[i+1], S[args[i]]});
T::store(dynamic_memory, accesses);
complexity += accesses.size() / 2 * T::default_length;
}
template<class T>
template<class U>
void GC::Processor<T>::store_dynamic_indirect(const vector<int>& args,
U& dynamic_memory)
{
vector< WriteAccess<T> > accesses;
if (args.size() % 2 != 0)
throw runtime_error("invalid number of arguments");
for (size_t i = 0; i < args.size(); i += 2)
accesses.push_back({C[args[i+1]], S[args[i]]});
T::store(dynamic_memory, accesses);
complexity += accesses.size() / 2 * T::default_length;
}
template<class T>
template<class U>
void GC::Processor<T>::store_clear_in_dynamic(const vector<int>& args,
U& dynamic_memory)
{
vector<ClearWriteAccess> accesses;
check_args(args, 2);
for (size_t i = 0; i < args.size(); i += 2)
accesses.push_back({C[args[i+1]], C[args[i]]});
T::store_clear_in_dynamic(dynamic_memory, accesses);
}
template<class T>
template<class U, class V>
void Processor<T>::mem_op(int n, U& dest, const V& source,
Integer dest_address, Integer source_address)
{
dest.check_index(dest_address + n - 1);
source.check_index(source_address + n - 1);
auto d = &dest[dest_address.get()];
auto s = &source[source_address.get()];
for (int i = 0; i < n; i++)
{
*d++ = *s++;
}
}
template <class T>
void Processor<T>::xors(const vector<int>& args)
{
xors(args, 0, args.size());
}
template <class T>
void Processor<T>::xors(const vector<int>& args, size_t start, size_t end)
{
assert(start % 4 == 0);
assert(end % 4 == 0);
assert(start < end);
assert(args.begin() + end <= args.end());
int dl = T::default_length;
for (auto it = args.begin() + start; it < args.begin() + end; it += 4)
{
if (*it == 1)
S[*(it + 1)].xor_(1, S[*(it + 2)], S[*(it + 3)]);
else
{
int n_units = DIV_CEIL(*it, dl);
for (int j = 0; j < n_units; j++)
{
int left = min(dl, *it - j * dl);
S[*(it + 1) + j].xor_(left, S[*(it + 2) + j],
S[*(it + 3) + j]);
}
}
}
}
template<class T>
void Processor<T>::xorc(const ::BaseInstruction& instruction)
{
int total = instruction.get_n();
for (int i = 0; i < DIV_CEIL(total, T::default_length); i++)
{
int n = min(T::default_length, total - i * T::default_length);
C[instruction.get_r(0) + i] = BitVec(C[instruction.get_r(1) + i]).mask(n)
^ BitVec(C[instruction.get_r(2) + i]).mask(n);
}
}
template<class T>
void Processor<T>::nots(const ::BaseInstruction& instruction)
{
int total = instruction.get_n();
for (int i = 0; i < DIV_CEIL(total, T::default_length); i++)
{
int n = min(T::default_length, total - i * T::default_length);
S[instruction.get_r(0) + i].invert(n, S[instruction.get_r(1) + i]);
}
}
template<class T>
void Processor<T>::notcb(const ::BaseInstruction& instruction)
{
int total = instruction.get_n();
int unit = Clear::N_BITS;
for (int i = 0; i < DIV_CEIL(total, unit); i++)
{
int n = min(unit, total - i * unit);
C[instruction.get_r(0) + i] =
Clear(~C[instruction.get_r(1) + i].get()).mask(n);
}
}
template<class T>
void Processor<T>::movsb(const ::BaseInstruction& instruction)
{
int n_blocks;
if (instruction.get_n() < unsigned(T::default_length))
n_blocks = 1;
else
n_blocks = DIV_CEIL(instruction.get_n(), T::default_length);
for (int i = 0; i < n_blocks; i++)
S[instruction.get_r(0) + i] = S[instruction.get_r(1) + i];
}
template<class T>
void Processor<T>::andm(const ::BaseInstruction& instruction)
{
for (int i = 0; i < DIV_CEIL(instruction.get_n(), T::default_length); i++)
S[instruction.get_r(0) + i] = S[instruction.get_r(1) + i]
& C[instruction.get_r(2) + i];
}
template <class T>
void Processor<T>::and_(const vector<int>& args, bool repeat)
{
check_args(args, 4);
for (size_t i = 0; i < args.size(); i += 4)
{
for (int j = 0; j < DIV_CEIL(args[i], T::default_length); j++)
{
int n = min(T::default_length, args[i] - j * T::default_length);
S[args[i + 1] + j].and_(n, S[args[i + 2] + j],
S[args[i + 3] + (repeat ? 0 : j)], repeat);
}
complexity += args[i];
}
}
template <class T>
void Processor<T>::andrsvec(const vector<int>& args)
{
int N_BITS = T::default_length;
auto it = args.begin();
while (it < args.end())
{
int n_args = (*it++ - 3) / 2;
int size = *it++;
int base = *(it + n_args);
for (int i = 0; i < size; i += 1)
{
if (i % N_BITS == 0)
for (int j = 0; j < n_args; j++)
S.at(*(it + j) + i / N_BITS).resize_regs(
min(N_BITS, size - i));
T y;
y.get_regs().push_back(S.at(base + i / N_BITS).get_reg(i % N_BITS));
for (int j = 0; j < n_args; j++)
{
T x, tmp;
x.get_regs().push_back(
S.at(*(it + n_args + 1 + j) + i / N_BITS).get_reg(
i % N_BITS));
tmp.and_(1, x, y, false);
S.at(*(it + j) + i / N_BITS).get_reg(i % N_BITS) = tmp.get_reg(0);
}
}
it += 2 * n_args + 1;
}
}
template <class T>
void Processor<T>::input(const vector<int>& args)
{
InputArgList a(args);
for (auto x : a)
{
S[x.dest] = T::input(*this, x);
#ifdef DEBUG_INPUT
cout << "input to " << args[i+2] << "/" << &S[args[i+2]] << endl;
#endif
}
}
template <class T>
void Processor<T>::reveal(const vector<int>& args)
{
for (size_t j = 0; j < args.size(); j += 3)
{
int n = args[j];
int r0 = args[j + 1];
int r1 = args[j + 2];
if (n > max(T::default_length, Clear::N_BITS))
assert(T::default_length == Clear::N_BITS);
for (int i = 0; i < DIV_CEIL(n, T::default_length); i++)
S[r1 + i].reveal(min(Clear::N_BITS, n - i * Clear::N_BITS),
C[r0 + i]);
}
}
template <class T>
template <int>
void Processor<T>::convcbit2s(const BaseInstruction& instruction)
{
int unit = GC::Clear::N_BITS;
auto& share_thread = ShareThread<T>::s();
for (int i = 0; i < DIV_CEIL(instruction.get_n(), unit); i++)
S[instruction.get_r(0) + i] = T::constant(C[instruction.get_r(1) + i],
share_thread.P->my_num(), share_thread.MC->get_alphai(),
min(size_t(unit), instruction.get_n() - i * unit));
}
template<class T>
void Processor<T>::convcbitvec(const BaseInstruction& instruction,
StackedVector<Integer>& Ci, Player* P)
{
vector<Integer> bits;
auto n = instruction.get_n();
bits.reserve(n);
for (size_t i = 0; i < instruction.get_n(); i++)
{
int i1 = i / GC::Clear::N_BITS;
int i2 = i % GC::Clear::N_BITS;
auto bit = C[instruction.get_r(1) + i1].get_bit(i2);
bits.push_back(bit);
}
try
{
auto proto = ShareThread<T>::s().protocol;
auto P = ShareThread<T>::s().P;
// The default use case in the compiler doesn't require synchronization
// with function-dependent protocols, but testing does.
if (proto and OnlineOptions::singleton.has_option("convcbitvec_sync"))
proto->sync(bits, *P);
else
throw no_singleton();
}
catch (no_singleton&)
{
if (P)
ProtocolBase<T>::sync(bits, *P);
else if (not T::symmetric)
ProtocolBase<T>::sync(bits, *Thread<T>::s().P);
}
auto dest = Ci.iterator_for_size(instruction.get_r(0), n);
for (auto& bit : bits)
*dest++ = bit;
}
template <class T>
void Processor<T>::print_reg(int reg, int n, int size)
{
#ifdef DEBUG_VALUES
cout << "print_reg " << typeid(T).name() << " " << reg << " " << &C[reg] << endl;
#endif
out << "Reg[" << reg << "] = 0x" << hex << noshowbase;
for (int i = 0; i < size; i++)
{
out.fill('0');
out.width(16);
out << (unsigned long)C[reg + size - 1 - i].get();
}
out << dec << " # ";
print_str(n);
out << endl << flush;
}
template <class T>
template <class U>
void Processor<T>::print_reg_plain(U& value)
{
out << hex << showbase << value << dec << flush;
}
template <class T>
void Processor<T>::print_reg_signed(unsigned n_bits, Integer reg)
{
if (n_bits <= Clear::N_BITS)
{
auto value = C[reg.get()];
unsigned n_shift = 0;
if (n_bits > 1)
n_shift = sizeof(value.get()) * 8 - n_bits;
if (n_shift > 63)
n_shift = 0;
out << dec << (value.get() << n_shift >> n_shift) << flush;
}
else
{
bigint tmp = 0;
for (int i = 0; i < DIV_CEIL(n_bits, Clear::N_BITS); i++)
{
tmp += bigint((unsigned long)C[reg + i].get()) << (i * Clear::N_BITS);
}
if (tmp >= bigint(1) << (n_bits - 1))
tmp -= bigint(1) << n_bits;
out << dec << tmp << flush;
}
}
template <class T>
void Processor<T>::print_chr(int n)
{
out << (char)n << flush;
}
template <class T>
void Processor<T>::print_str(int n)
{
out << string((char*)&n,sizeof(n)) << flush;
}
template <class T>
void Processor<T>::print_float(const vector<int>& args)
{
bigint::output_float(out,
bigint::get_float(C[args[0]], C[args[1]], C[args[2]], C[args[3]]),
C[args[4]]);
}
template <class T>
void Processor<T>::print_float_prec(int n)
{
out << setprecision(n);
}
template<class T>
void Processor<T>::incint(const BaseInstruction& instruction)
{
auto dest = &I[instruction.get_r(0)];
auto base = I[instruction.get_r(1)];
auto& start = instruction.get_start();
for (int i = 0; i < instruction.get_size(); i++)
{
int inc = (i / start[0]) % start[1];
*dest++ = base + inc * int(instruction.get_n());
}
}
template<class T>
void GC::Processor<T>::push_stack()
{
S.push_stack();
C.push_stack();
}
template<class T>
void GC::Processor<T>::push_args(const vector<int>& args)
{
S.push_args(args, SBIT);
C.push_args(args, CBIT);
}
template<class T>
void GC::Processor<T>::pop_stack(const vector<int>& results)
{
S.pop_stack(results, SBIT);
C.pop_stack(results, CBIT);
}
template<class T>
template<class U>
void Processor<T>::call_tape(const BaseInstruction& instruction, U& dynamic_memory)
{
if (T::garbled)
throw runtime_error(
"calling tapes not supported with garbled circuits, "
"compile with '--garbled'");
auto new_arg = I.at(instruction.get_r(1)).get();
PC_stack.push_back(PC);
arg_stack.push_back(this->arg);
push_stack();
I.push_stack();
auto& tape = machine->progs.at(instruction.get_r(0));
reset(tape, new_arg);
auto& args = instruction.get_start();
push_args(args);
I.push_args(args, INT);
tape.execute(*this, dynamic_memory, PC);
pop_stack(args);
I.pop_stack(args, INT);
PC = PC_stack.back();
PC_stack.pop_back();
this->arg = arg_stack.back();
arg_stack.pop_back();
}
} /* namespace GC */
#endif