Reduce memory usage in matrix multiplication with Beaver-based protocols.

This commit is contained in:
Marcel Keller
2023-03-28 11:28:28 +11:00
parent 0e042485b6
commit 6e0c4ea66b
2 changed files with 45 additions and 26 deletions

View File

@@ -36,9 +36,9 @@ class SubProcessor
void resize(size_t size) { C.resize(size); S.resize(size); }
void matmulsm_prep(int i, const CheckVector<T>& source,
void matmulsm_prep(int ii, int j, const CheckVector<T>& source,
const vector<int>& dim, size_t a, size_t b);
void matmulsm_finalize(int i, const vector<int>& dim,
void matmulsm_finalize(int i, int j, const vector<int>& dim,
typename vector<T>::iterator C);
template<class sint, class sgf2n> friend class Processor;

View File

@@ -583,49 +583,68 @@ void SubProcessor<T>::matmulsm(const CheckVector<T>& source,
assert(Proc);
int base = 0;
int base2 = 0;
protocol.init_dotprod();
for (int i = 0; i < dim[0]; i++)
{
matmulsm_prep(i, source, dim, a, b);
if (protocol.get_buffer_size() > OnlineOptions::singleton.batch_size)
auto ii = Proc->get_Ci().at(dim[3] + i);
for (int j = 0; j < dim[2]; j++)
{
protocol.exchange();
for (int j = base; j <= i; j++)
matmulsm_finalize(j, dim, C);
base = i + 1;
protocol.init_dotprod();
#ifdef DEBUG_MATMULSM
cerr << "matmulsm prep " << i << " " << j << endl;
#endif
matmulsm_prep(ii, j, source, dim, a, b);
if (protocol.get_buffer_size() > OnlineOptions::singleton.batch_size)
{
#ifdef DEBUG_MATMULSM
cerr << "matmulsm round " << protocol.get_buffer_size() << endl;
#endif
protocol.exchange();
if (base < i)
for (int l = base2; l < dim[2]; l++)
matmulsm_finalize(base, l, dim, C);
for (int k = base + 1; k < i; k++)
for (int l = 0; l < dim[2]; l++)
matmulsm_finalize(k, l, dim, C);
for (int l = base < i ? 0 : base2; l <= j; l++)
matmulsm_finalize(i, l, dim, C);
base = i;
base2 = j + 1;
protocol.init_dotprod();
}
}
}
protocol.exchange();
for (int i = base; i < dim[0]; i++)
matmulsm_finalize(i, dim, C);
for (int j = base2; j < dim[2]; j++)
matmulsm_finalize(base, j, dim, C);
for (int i = base + 1; i < dim[0]; i++)
for (int j = 0; j < dim[2]; j++)
matmulsm_finalize(i, j, dim, C);
}
template<class T>
void SubProcessor<T>::matmulsm_prep(int i, const CheckVector<T>& source,
void SubProcessor<T>::matmulsm_prep(int ii, int j, const CheckVector<T>& source,
const vector<int>& dim, size_t a, size_t b)
{
auto ii = Proc->get_Ci().at(dim[3] + i);
for (int j = 0; j < dim[2]; j++)
auto jj = Proc->get_Ci().at(dim[6] + j);
for (int k = 0; k < dim[1]; k++)
{
auto jj = Proc->get_Ci().at(dim[6] + j);
for (int k = 0; k < dim[1]; k++)
{
auto kk = Proc->get_Ci().at(dim[4] + k);
auto ll = Proc->get_Ci().at(dim[5] + k);
protocol.prepare_dotprod(source.at(a + ii * dim[7] + kk),
source.at(b + ll * dim[8] + jj));
}
protocol.next_dotprod();
auto kk = Proc->get_Ci().at(dim[4] + k);
auto ll = Proc->get_Ci().at(dim[5] + k);
protocol.prepare_dotprod(source.at(a + ii * dim[7] + kk),
source.at(b + ll * dim[8] + jj));
}
protocol.next_dotprod();
}
template<class T>
void SubProcessor<T>::matmulsm_finalize(int i, const vector<int>& dim,
void SubProcessor<T>::matmulsm_finalize(int i, int j, const vector<int>& dim,
typename vector<T>::iterator C)
{
for (int j = 0; j < dim[2]; j++)
*(C + i * dim[2] + j) = protocol.finalize_dotprod(dim[1]);
#ifdef DEBUG_MATMULSM
cerr << "matmulsm finalize " << i << " " << j << endl;
#endif
*(C + i * dim[2] + j) = protocol.finalize_dotprod(dim[1]);
}
template<class T>