sss/hazmat.c

#include "hazmat.h"
#include "tweetnacl.h"
#include <assert.h>
#include <string.h>

typedef struct {
	uint8_t x;
	uint8_t y;
} ByteShare;


extern void FIPS202_SHAKE256(const unsigned char *in, unsigned long long inLen,
	                     unsigned char *out, unsigned long long outLen);


static void bitslice(uint32_t r[8], const uint8_t x[32])
{
	size_t bit_idx, arr_idx;
	uint32_t cur;

	memset(r, 0, sizeof(uint32_t[8]));
	for (arr_idx = 0; arr_idx < 32; arr_idx++) {
		cur = (uint32_t) x[arr_idx];
		for (bit_idx = 0; bit_idx < 8; bit_idx++) {
			r[bit_idx] |= ((cur & (1 << bit_idx)) >> bit_idx) << arr_idx;
		}
	}
}


static void unbitslice(uint8_t r[32], const uint32_t x[8])
{
	size_t bit_idx, arr_idx;
	uint32_t cur;

	memset(r, 0, sizeof(uint8_t[32]));
	for (bit_idx = 0; bit_idx < 8; bit_idx++) {
		cur = (uint32_t) x[bit_idx];
		for (arr_idx = 0; arr_idx < 32; arr_idx++) {
			r[arr_idx] |= ((cur & (1 << arr_idx)) >> arr_idx) << bit_idx;
		}
	}
}


static void bitslice_setall(uint32_t r[8], const uint8_t x)
{
	size_t idx;
	for (idx = 0; idx < 8; idx++) {
		r[idx] = ((int32_t) ((x & (1 << idx)) << (31 - idx))) >> 31;
	}
}


/*
 * Add (XOR) `r` with `x` and store the result in `r`.
 */
static void gf256_add(uint32_t r[8], const uint32_t x[8])
{
	size_t idx;
	for (idx = 0; idx < 8; idx++) r[idx] ^= x[idx];
}


/*
 * Safely multiply two bitsliced polynomials in GF(2^8) reduced by
 * x^8 + x^4 + x^3 + x + 1.
 */
static void gf256_mul(uint32_t r[8], const uint32_t a[8], const uint32_t b[8])
{
	/* This function implements Russian Peasant multiplication on two
	 * bitsliced polynomials.
	 *
	 * I personally think that these kinds of long lists of operations
	 * are often a bit ugly. A double for loop would be nicer and would
	 * take up a lot less lines of code.
	 * However, some compilers seem to fail in optimizing these kinds of
	 * loops. So we will just have to do this by hand.
	 */
	uint32_t a2[8], b2[8];

	memcpy(a2, a, sizeof(uint32_t[8]));
	memcpy(b2, b, sizeof(uint32_t[8]));
	memset(r, 0, sizeof(uint32_t[8]));

	r[0] ^= a2[0] & b2[0]; /* add */
	r[1] ^= a2[1] & b2[0];
	r[2] ^= a2[2] & b2[0];
	r[3] ^= a2[3] & b2[0];
	r[4] ^= a2[4] & b2[0];
	r[5] ^= a2[5] & b2[0];
	r[6] ^= a2[6] & b2[0];
	r[7] ^= a2[7] & b2[0];
	a2[0] ^= a2[7]; /* reduce */
	a2[2] ^= a2[7];
	a2[3] ^= a2[7];

	r[0] ^= a2[7] & b2[1]; /* add */
	r[1] ^= a2[0] & b2[1];
	r[2] ^= a2[1] & b2[1];
	r[3] ^= a2[2] & b2[1];
	r[4] ^= a2[3] & b2[1];
	r[5] ^= a2[4] & b2[1];
	r[6] ^= a2[5] & b2[1];
	r[7] ^= a2[6] & b2[1];
	a2[7] ^= a2[6]; /* reduce */
	a2[1] ^= a2[6];
	a2[2] ^= a2[6];

	r[0] ^= a2[6] & b2[2]; /* add */
	r[1] ^= a2[7] & b2[2];
	r[2] ^= a2[0] & b2[2];
	r[3] ^= a2[1] & b2[2];
	r[4] ^= a2[2] & b2[2];
	r[5] ^= a2[3] & b2[2];
	r[6] ^= a2[4] & b2[2];
	r[7] ^= a2[5] & b2[2];
	a2[6] ^= a2[5]; /* reduce */
	a2[0] ^= a2[5];
	a2[1] ^= a2[5];

	r[0] ^= a2[5] & b2[3]; /* add */
	r[1] ^= a2[6] & b2[3];
	r[2] ^= a2[7] & b2[3];
	r[3] ^= a2[0] & b2[3];
	r[4] ^= a2[1] & b2[3];
	r[5] ^= a2[2] & b2[3];
	r[6] ^= a2[3] & b2[3];
	r[7] ^= a2[4] & b2[3];
	a2[5] ^= a2[4]; /* reduce */
	a2[7] ^= a2[4];
	a2[0] ^= a2[4];

	r[0] ^= a2[4] & b2[4]; /* add */
	r[1] ^= a2[5] & b2[4];
	r[2] ^= a2[6] & b2[4];
	r[3] ^= a2[7] & b2[4];
	r[4] ^= a2[0] & b2[4];
	r[5] ^= a2[1] & b2[4];
	r[6] ^= a2[2] & b2[4];
	r[7] ^= a2[3] & b2[4];
	a2[4] ^= a2[3]; /* reduce */
	a2[6] ^= a2[3];
	a2[7] ^= a2[3];

	r[0] ^= a2[3] & b2[5]; /* add */
	r[1] ^= a2[4] & b2[5];
	r[2] ^= a2[5] & b2[5];
	r[3] ^= a2[6] & b2[5];
	r[4] ^= a2[7] & b2[5];
	r[5] ^= a2[0] & b2[5];
	r[6] ^= a2[1] & b2[5];
	r[7] ^= a2[2] & b2[5];
	a2[3] ^= a2[2]; /* reduce */
	a2[5] ^= a2[2];
	a2[6] ^= a2[2];

	r[0] ^= a2[2] & b2[6]; /* add */
	r[1] ^= a2[3] & b2[6];
	r[2] ^= a2[4] & b2[6];
	r[3] ^= a2[5] & b2[6];
	r[4] ^= a2[6] & b2[6];
	r[5] ^= a2[7] & b2[6];
	r[6] ^= a2[0] & b2[6];
	r[7] ^= a2[1] & b2[6];
	a2[2] ^= a2[1]; /* reduce */
	a2[4] ^= a2[1];
	a2[5] ^= a2[1];

	r[0] ^= a2[1] & b2[7]; /* add */
	r[1] ^= a2[2] & b2[7];
	r[2] ^= a2[3] & b2[7];
	r[3] ^= a2[4] & b2[7];
	r[4] ^= a2[5] & b2[7];
	r[5] ^= a2[6] & b2[7];
	r[6] ^= a2[7] & b2[7];
	r[7] ^= a2[0] & b2[7];
}


/*
 * Invert `x` in GF(2^8) and write the result to `r`
 */
static void gf256_inv(uint32_t r[8], uint32_t x[8])
{
	size_t idx;
	memcpy(r, x, sizeof(uint32_t[8]));
	/* Use square-multiply to calculate a^254 */
	for (idx = 0; idx < 6; idx++) {
		/* TODO(dsprenkels) Optimize for squaring */
		gf256_mul(r, r, r);
		gf256_mul(r, r, x);
	}
	gf256_mul(r, r, r);
}


/*
 * Create `k` key shares of the key given in `key`. The caller has to ensure
 * that the array `out` has enough space to hold at least `n` sss_Keyshare
 * structs.
 */
 void sss_create_keyshares(sss_Keyshare *out,
                           const uint8_t key[32],
                           uint8_t n,
                           uint8_t k)
{
	/* Check if the parameters are valid */
	assert(n != 0);
	assert(k != 0);
	assert(k <= n);

	uint8_t share_idx, coeff_idx, unbitsliced_x;
	uint32_t poly0[8], poly[k-1][8], x[8], y[8], xpow[8], tmp[8];

	/* Put the secret in the bottom part of the polynomial */
	bitslice(poly0, key);

	/* Generate the other terms of the polynomial */
	FIPS202_SHAKE256(key, 32, (void*) poly, sizeof(poly));

	for (share_idx = 0; share_idx < n; share_idx++) {
		/* x value is in 1..n */
		unbitsliced_x = share_idx + 1;
		out[share_idx].x = unbitsliced_x;
		bitslice_setall(x, unbitsliced_x);

		/* Calculate y */
		memset(y, 0, sizeof(y));
		memset(xpow, 0, sizeof(xpow));
		xpow[0] = ~0;
		gf256_add(y, poly0);
		for (coeff_idx = 0; coeff_idx < (k-1); coeff_idx++) {
			gf256_mul(xpow, xpow, x);
			gf256_mul(tmp, xpow, poly[coeff_idx]);
			gf256_add(y, tmp);
		}
		unbitslice(out[share_idx].y, y);
	}
}


/*
 * Restore the `k` sss_Keyshare structs given in `shares` and write the result
 * to `key`.
 */
 void sss_combine_keyshares(uint8_t key[32],
                            const sss_Keyshare *key_shares,
                            uint8_t k)
{
	size_t share_idx, idx1, idx2;
	uint32_t xs[k][8], ys[k][8];
	uint32_t num[8], denom[8], tmp[8];
	uint32_t secret[8] = {0}, basis_poly[8] = {0};

	/* Collect the x and y values */
	for (share_idx = 0; share_idx < k; share_idx++) {
		bitslice_setall(xs[share_idx], key_shares[share_idx].x);
		bitslice(ys[share_idx], key_shares[share_idx].y);
	}

	/* Use Lagrange basis polynomials to calculate the secret coefficient */
	for (idx1 = 0; idx1 < k; idx1++) {
		memset(num, 0, sizeof(num));
		memset(denom, 0, sizeof(denom));
		num[0] = ~0;
		denom[0] = ~0;
		for (idx2 = 0; idx2 < k; idx2++) {
			if (idx1 == idx2) continue;
			gf256_mul(num, num, xs[idx2]);
			memcpy(tmp, xs[idx1], sizeof(uint32_t[8]));
			gf256_add(tmp, xs[idx2]);
			gf256_mul(denom, denom, tmp);
		}
		gf256_inv(tmp, denom);
		gf256_mul(basis_poly, num, tmp);
		/* Add scaled polynomial coefficient to restored secret */
		gf256_mul(tmp, ys[idx1], basis_poly);
		gf256_add(secret, tmp);
	}
	unbitslice(key, secret);
}