mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-08 22:28:01 -05:00
refactor(gpu): AES 64
This commit is contained in:
@@ -20,7 +20,7 @@ template <typename Torus> struct int_aes_lut_buffers {
|
||||
bool allocate_gpu_memory, uint32_t num_aes_inputs,
|
||||
uint32_t sbox_parallelism, uint64_t &size_tracker) {
|
||||
|
||||
constexpr uint32_t AES_STATE_BITS = 128;
|
||||
constexpr uint32_t AES_STATE_BITS = 64;
|
||||
constexpr uint32_t SBOX_MAX_AND_GATES = 18;
|
||||
|
||||
this->and_lut = new int_radix_lut<Torus>(
|
||||
@@ -238,7 +238,7 @@ template <typename Torus> struct int_aes_main_workspaces {
|
||||
bool allocate_gpu_memory, uint32_t num_aes_inputs,
|
||||
uint32_t sbox_parallelism, uint64_t &size_tracker) {
|
||||
|
||||
constexpr uint32_t AES_STATE_BITS = 128;
|
||||
constexpr uint32_t AES_STATE_BITS = 64;
|
||||
constexpr uint32_t SBOX_MAX_AND_GATES = 18;
|
||||
constexpr uint32_t BATCH_BUFFER_OPERANDS = 3;
|
||||
|
||||
@@ -397,7 +397,7 @@ template <typename Torus> struct int_key_expansion_buffer {
|
||||
this->params = params;
|
||||
this->allocate_gpu_memory = allocate_gpu_memory;
|
||||
|
||||
constexpr uint32_t TOTAL_WORDS = 44;
|
||||
constexpr uint32_t TOTAL_WORDS = 22;
|
||||
constexpr uint32_t BITS_PER_WORD = 32;
|
||||
constexpr uint32_t TOTAL_BITS = TOTAL_WORDS * BITS_PER_WORD;
|
||||
|
||||
|
||||
@@ -621,7 +621,7 @@ __host__ void vectorized_shift_rows(CudaStreams streams,
|
||||
CudaRadixCiphertextFFI *state_bitsliced,
|
||||
uint32_t num_aes_inputs,
|
||||
int_aes_encrypt_buffer<Torus> *mem) {
|
||||
constexpr uint32_t NUM_BYTES = 16;
|
||||
constexpr uint32_t NUM_BYTES = 8;
|
||||
constexpr uint32_t LEN_BYTE = 8;
|
||||
constexpr uint32_t NUM_BITS = NUM_BYTES * LEN_BYTE;
|
||||
|
||||
@@ -649,8 +649,7 @@ __host__ void vectorized_shift_rows(CudaStreams streams,
|
||||
i * num_aes_inputs, (i + 1) * num_aes_inputs);
|
||||
}
|
||||
|
||||
const int shift_rows_map[] = {0, 5, 10, 15, 4, 9, 14, 3,
|
||||
8, 13, 2, 7, 12, 1, 6, 11};
|
||||
const int shift_rows_map[] = {0, 1, 3, 2, 4, 5, 7, 6};
|
||||
|
||||
for (int i = 0; i < NUM_BYTES; i++) {
|
||||
for (int bit = 0; bit < LEN_BYTE; bit++) {
|
||||
@@ -711,7 +710,7 @@ __host__ void vectorized_mix_columns(CudaStreams streams,
|
||||
|
||||
constexpr uint32_t BITS_PER_BYTE = 8;
|
||||
constexpr uint32_t BYTES_PER_COLUMN = 4;
|
||||
constexpr uint32_t NUM_COLUMNS = 4;
|
||||
constexpr uint32_t NUM_COLUMNS = 2;
|
||||
constexpr uint32_t BITS_PER_COLUMN = BYTES_PER_COLUMN * BITS_PER_BYTE;
|
||||
|
||||
for (uint32_t col = 0; col < NUM_COLUMNS; ++col) {
|
||||
@@ -849,7 +848,7 @@ __host__ void vectorized_aes_encrypt_inplace(
|
||||
int_aes_encrypt_buffer<Torus> *mem, void *const *bsks, Torus *const *ksks) {
|
||||
|
||||
constexpr uint32_t BITS_PER_BYTE = 8;
|
||||
constexpr uint32_t STATE_BYTES = 16;
|
||||
constexpr uint32_t STATE_BYTES = 8;
|
||||
constexpr uint32_t STATE_BITS = STATE_BYTES * BITS_PER_BYTE;
|
||||
constexpr uint32_t ROUNDS = 10;
|
||||
|
||||
@@ -910,6 +909,7 @@ __host__ void vectorized_aes_encrypt_inplace(
|
||||
mem, bsks, ksks);
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
case 8:
|
||||
for (uint32_t i = 0; i < STATE_BYTES; i += 8) {
|
||||
CudaRadixCiphertextFFI *sbox_inputs[] = {
|
||||
@@ -921,19 +921,6 @@ __host__ void vectorized_aes_encrypt_inplace(
|
||||
mem, bsks, ksks);
|
||||
}
|
||||
break;
|
||||
case 16: {
|
||||
CudaRadixCiphertextFFI *sbox_inputs[] = {
|
||||
&s_bits[0 * BITS_PER_BYTE], &s_bits[1 * BITS_PER_BYTE],
|
||||
&s_bits[2 * BITS_PER_BYTE], &s_bits[3 * BITS_PER_BYTE],
|
||||
&s_bits[4 * BITS_PER_BYTE], &s_bits[5 * BITS_PER_BYTE],
|
||||
&s_bits[6 * BITS_PER_BYTE], &s_bits[7 * BITS_PER_BYTE],
|
||||
&s_bits[8 * BITS_PER_BYTE], &s_bits[9 * BITS_PER_BYTE],
|
||||
&s_bits[10 * BITS_PER_BYTE], &s_bits[11 * BITS_PER_BYTE],
|
||||
&s_bits[12 * BITS_PER_BYTE], &s_bits[13 * BITS_PER_BYTE],
|
||||
&s_bits[14 * BITS_PER_BYTE], &s_bits[15 * BITS_PER_BYTE]};
|
||||
vectorized_sbox_n_bytes<Torus>(streams, sbox_inputs, 16, num_aes_inputs,
|
||||
mem, bsks, ksks);
|
||||
} break;
|
||||
default:
|
||||
PANIC("Unsupported S-Box parallelism level selected: %u",
|
||||
sbox_parallelism);
|
||||
@@ -993,7 +980,7 @@ __host__ void vectorized_aes_full_adder_inplace(
|
||||
const Torus *counter_bits_le_all_blocks, uint32_t num_aes_inputs,
|
||||
int_aes_encrypt_buffer<Torus> *mem, void *const *bsks, Torus *const *ksks) {
|
||||
|
||||
constexpr uint32_t NUM_BITS = 128;
|
||||
constexpr uint32_t NUM_BITS = 64;
|
||||
|
||||
// --- Initialization ---
|
||||
CudaRadixCiphertextFFI *carry_vec =
|
||||
@@ -1098,7 +1085,7 @@ __host__ void host_integer_aes_ctr_encrypt(
|
||||
const Torus *counter_bits_le_all_blocks, uint32_t num_aes_inputs,
|
||||
int_aes_encrypt_buffer<Torus> *mem, void *const *bsks, Torus *const *ksks) {
|
||||
|
||||
constexpr uint32_t NUM_BITS = 128;
|
||||
constexpr uint32_t NUM_BITS = 64;
|
||||
|
||||
CudaRadixCiphertextFFI *initial_states =
|
||||
mem->main_workspaces->initial_states_and_jit_key_workspace;
|
||||
@@ -1159,8 +1146,8 @@ __host__ void host_integer_key_expansion(CudaStreams streams,
|
||||
constexpr uint32_t BITS_PER_WORD = 32;
|
||||
constexpr uint32_t BITS_PER_BYTE = 8;
|
||||
constexpr uint32_t BYTES_PER_WORD = 4;
|
||||
constexpr uint32_t TOTAL_WORDS = 44;
|
||||
constexpr uint32_t KEY_WORDS = 4;
|
||||
constexpr uint32_t TOTAL_WORDS = 22;
|
||||
constexpr uint32_t KEY_WORDS = 2;
|
||||
|
||||
const Torus rcon[] = {0x01, 0x02, 0x04, 0x08, 0x10,
|
||||
0x20, 0x40, 0x80, 0x1b, 0x36};
|
||||
@@ -1178,8 +1165,8 @@ __host__ void host_integer_key_expansion(CudaStreams streams,
|
||||
|
||||
as_radix_ciphertext_slice<Torus>(&tmp_word_buffer, mem->tmp_word_buffer, 0,
|
||||
BITS_PER_WORD);
|
||||
as_radix_ciphertext_slice<Torus>(&tmp_far, words, (w - 4) * BITS_PER_WORD,
|
||||
(w - 3) * BITS_PER_WORD);
|
||||
as_radix_ciphertext_slice<Torus>(&tmp_far, words, (w - 2) * BITS_PER_WORD,
|
||||
(w - 1) * BITS_PER_WORD);
|
||||
as_radix_ciphertext_slice<Torus>(&tmp_near, words, (w - 1) * BITS_PER_WORD,
|
||||
w * BITS_PER_WORD);
|
||||
|
||||
|
||||
@@ -23,9 +23,9 @@ pub mod cuda {
|
||||
let param = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let atomic_param: AtomicPatternParameters = param.into();
|
||||
|
||||
let key: u128 = 0x2b7e151628aed2a6abf7158809cf4f3c;
|
||||
let iv: u128 = 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff;
|
||||
let aes_op_bit_size = 128;
|
||||
let key: u64 = 0x2b7e151628aed2a6;
|
||||
let iv: u64 = 0xf0f1f2f3f4f5f6f7;
|
||||
let aes_op_bit_size = 64;
|
||||
|
||||
let param_name = param.name();
|
||||
|
||||
@@ -36,14 +36,14 @@ pub mod cuda {
|
||||
let sks = CudaServerKey::new(&cpu_cks, &streams);
|
||||
let cks = RadixClientKey::from((cpu_cks, 1));
|
||||
|
||||
let ct_key = cks.encrypt_u128_for_aes_ctr(key);
|
||||
let ct_iv = cks.encrypt_u128_for_aes_ctr(iv);
|
||||
let ct_key = cks.encrypt_u64_for_aes_ctr(key);
|
||||
let ct_iv = cks.encrypt_u64_for_aes_ctr(iv);
|
||||
|
||||
let d_key = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_key, &streams);
|
||||
let d_iv = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_iv, &streams);
|
||||
|
||||
{
|
||||
const NUM_AES_INPUTS: usize = 1;
|
||||
const NUM_AES_INPUTS: usize = 2;
|
||||
const SBOX_PARALLELISM: usize = 16;
|
||||
let bench_id = format!("{param_name}::{NUM_AES_INPUTS}_input_encryption");
|
||||
|
||||
@@ -105,8 +105,8 @@ pub mod cuda {
|
||||
|
||||
bench_group.throughput(Throughput::Elements(NUM_AES_INPUTS as u64));
|
||||
|
||||
let ct_key = cks.encrypt_u128_for_aes_ctr(key);
|
||||
let ct_iv = cks.encrypt_u128_for_aes_ctr(iv);
|
||||
let ct_key = cks.encrypt_u64_for_aes_ctr(key);
|
||||
let ct_iv = cks.encrypt_u64_for_aes_ctr(iv);
|
||||
|
||||
let d_key = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_key, &streams);
|
||||
let d_iv = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_iv, &streams);
|
||||
|
||||
@@ -7490,7 +7490,7 @@ pub(crate) unsafe fn cuda_backend_unchecked_aes_ctr_encrypt<T: UnsignedInteger,
|
||||
output: &mut CudaRadixCiphertext,
|
||||
iv: &CudaRadixCiphertext,
|
||||
round_keys: &CudaRadixCiphertext,
|
||||
start_counter: u128,
|
||||
start_counter: u64,
|
||||
num_aes_inputs: u32,
|
||||
sbox_parallelism: u32,
|
||||
bootstrapping_key: &CudaVec<B>,
|
||||
@@ -7534,8 +7534,8 @@ pub(crate) unsafe fn cuda_backend_unchecked_aes_ctr_encrypt<T: UnsignedInteger,
|
||||
|
||||
let counter_bits_le: Vec<u64> = (0..num_aes_inputs)
|
||||
.flat_map(|i| {
|
||||
let current_counter = start_counter + i as u128;
|
||||
(0..128).map(move |bit_index| ((current_counter >> bit_index) & 1) as u64)
|
||||
let current_counter = start_counter + i as u64;
|
||||
(0..64).map(move |bit_index| (current_counter >> bit_index) & 1)
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
||||
@@ -13,88 +13,32 @@ use crate::integer::gpu::{
|
||||
use crate::integer::{RadixCiphertext, RadixClientKey};
|
||||
use crate::shortint::Ciphertext;
|
||||
|
||||
const NUM_BITS: usize = 128;
|
||||
const NUM_BITS: usize = 64;
|
||||
|
||||
impl RadixClientKey {
|
||||
/// Encrypts a 128-bit block for homomorphic AES evaluation.
|
||||
///
|
||||
/// This function prepares a 128-bit plaintext block (like an AES key or IV)
|
||||
/// for homomorphic processing by decomposing it into its 128 constituent bits
|
||||
/// and encrypting each bit individually with FHE.
|
||||
///
|
||||
/// The process is as follows:
|
||||
/// ```text
|
||||
/// // INPUT: A 128-bit plaintext block
|
||||
/// Plaintext block (u128): 0x2b7e1516...
|
||||
/// |
|
||||
/// V
|
||||
/// // 1. Decompose the block into individual bits
|
||||
/// Individual bits: [b127, b126, ..., b1, b0]
|
||||
/// |
|
||||
/// V
|
||||
/// // 2. Encrypt each bit individually using FHE
|
||||
/// `self.encrypt(bit)` is applied to each bit
|
||||
/// |
|
||||
/// V
|
||||
/// // 3. Collect the resulting bit-ciphertexts
|
||||
/// Ciphertexts: [Ct(b127), Ct(b126), ..., Ct(b0)]
|
||||
/// |
|
||||
/// V
|
||||
/// // 4. Group the bit-ciphertexts into a single RadixCiphertext
|
||||
/// // representing the full encrypted block.
|
||||
/// // OUTPUT: A RadixCiphertext
|
||||
/// ```
|
||||
pub fn encrypt_u128_for_aes_ctr(&self, data: u128) -> RadixCiphertext {
|
||||
pub fn encrypt_u64_for_aes_ctr(&self, data: u64) -> RadixCiphertext {
|
||||
let mut blocks: Vec<Ciphertext> = Vec::with_capacity(NUM_BITS);
|
||||
for i in 0..NUM_BITS {
|
||||
let bit = ((data >> (NUM_BITS - 1 - i)) & 1) as u64;
|
||||
let bit = (data >> (NUM_BITS - 1 - i)) & 1;
|
||||
blocks.extend(self.encrypt(bit).blocks);
|
||||
}
|
||||
RadixCiphertext::from(blocks)
|
||||
}
|
||||
|
||||
/// Decrypts a `RadixCiphertext` containing one or more 128-bit blocks
|
||||
/// that were homomorphically processed.
|
||||
///
|
||||
/// This function reverses the encryption process by decrypting each individual
|
||||
/// bit-ciphertext and reassembling them into 128-bit plaintext blocks.
|
||||
///
|
||||
/// The process is as follows:
|
||||
/// ```text
|
||||
/// // INPUT: RadixCiphertext containing one or more encrypted blocks
|
||||
/// Ciphertext collection: [Ct(b127), ..., Ct(b0), Ct(b'127), ..., Ct(b'0), ...]
|
||||
/// |
|
||||
/// | (For each sequence of 128 bit-ciphertexts)
|
||||
/// V
|
||||
/// // 1. Decrypt each bit's ciphertext individually
|
||||
/// `self.decrypt(Ct)` is applied to each bit-ciphertext
|
||||
/// |
|
||||
/// V
|
||||
/// // 2. Collect the resulting plaintext bits
|
||||
/// Plaintext bits: [b127, b126, ..., b0]
|
||||
/// |
|
||||
/// V
|
||||
/// // 3. Assemble the bits back into a 128-bit block
|
||||
/// Reconstruction: ( ...((b127 << 1) | b126) << 1 | ... ) | b0
|
||||
/// |
|
||||
/// V
|
||||
/// // OUTPUT: A vector of plaintext u128 blocks
|
||||
/// Plaintext u128s: [0x..., ...]
|
||||
/// ```
|
||||
pub fn decrypt_u128_from_aes_ctr(
|
||||
pub fn decrypt_u64_from_aes_ctr(
|
||||
&self,
|
||||
encrypted_result: &RadixCiphertext,
|
||||
num_aes_inputs: usize,
|
||||
) -> Vec<u128> {
|
||||
) -> Vec<u64> {
|
||||
let mut plaintext_results = Vec::with_capacity(num_aes_inputs);
|
||||
for i in 0..num_aes_inputs {
|
||||
let mut current_block_plaintext: u128 = 0;
|
||||
let mut current_block_plaintext: u64 = 0;
|
||||
let block_start_index = i * NUM_BITS;
|
||||
for j in 0..NUM_BITS {
|
||||
let block_slice =
|
||||
&encrypted_result.blocks[block_start_index + j..block_start_index + j + 1];
|
||||
let block_radix_ct = RadixCiphertext::from(block_slice.to_vec());
|
||||
let decrypted_bit: u128 = self.decrypt(&block_radix_ct);
|
||||
let decrypted_bit: u64 = self.decrypt(&block_radix_ct);
|
||||
current_block_plaintext = (current_block_plaintext << 1) | decrypted_bit;
|
||||
}
|
||||
plaintext_results.push(current_block_plaintext);
|
||||
@@ -108,7 +52,7 @@ impl CudaServerKey {
|
||||
&self,
|
||||
key: &CudaUnsignedRadixCiphertext,
|
||||
iv: &CudaUnsignedRadixCiphertext,
|
||||
start_counter: u128,
|
||||
start_counter: u64,
|
||||
num_aes_inputs: usize,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaUnsignedRadixCiphertext {
|
||||
@@ -154,7 +98,7 @@ impl CudaServerKey {
|
||||
&self,
|
||||
key: &CudaUnsignedRadixCiphertext,
|
||||
iv: &CudaUnsignedRadixCiphertext,
|
||||
start_counter: u128,
|
||||
start_counter: u64,
|
||||
num_aes_inputs: usize,
|
||||
sbox_parallelism: usize,
|
||||
streams: &CudaStreams,
|
||||
@@ -188,13 +132,13 @@ impl CudaServerKey {
|
||||
&self,
|
||||
iv: &CudaUnsignedRadixCiphertext,
|
||||
round_keys: &CudaUnsignedRadixCiphertext,
|
||||
start_counter: u128,
|
||||
start_counter: u64,
|
||||
num_aes_inputs: usize,
|
||||
sbox_parallelism: usize,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaUnsignedRadixCiphertext {
|
||||
let mut result: CudaUnsignedRadixCiphertext =
|
||||
self.create_trivial_zero_radix(num_aes_inputs * 128, streams);
|
||||
self.create_trivial_zero_radix(num_aes_inputs * NUM_BITS, streams);
|
||||
|
||||
let num_round_key_blocks = 11 * NUM_BITS;
|
||||
|
||||
@@ -212,9 +156,9 @@ impl CudaServerKey {
|
||||
);
|
||||
assert_eq!(
|
||||
result.as_ref().d_blocks.lwe_ciphertext_count().0,
|
||||
num_aes_inputs * 128,
|
||||
num_aes_inputs * NUM_BITS,
|
||||
"AES result must contain {} encrypted bits for {num_aes_inputs} blocks, but contains {}",
|
||||
num_aes_inputs * 128,
|
||||
num_aes_inputs * NUM_BITS,
|
||||
result.as_ref().d_blocks.lwe_ciphertext_count().0
|
||||
);
|
||||
|
||||
@@ -327,7 +271,7 @@ impl CudaServerKey {
|
||||
streams: &CudaStreams,
|
||||
) -> CudaUnsignedRadixCiphertext {
|
||||
let num_round_keys = 11;
|
||||
let num_key_bits = 128;
|
||||
let num_key_bits = 64;
|
||||
let mut expanded_keys: CudaUnsignedRadixCiphertext =
|
||||
self.create_trivial_zero_radix(num_round_keys * num_key_bits, streams);
|
||||
|
||||
|
||||
@@ -85,16 +85,14 @@ impl<F> GpuFunctionExecutor<F> {
|
||||
}
|
||||
|
||||
impl<'a, F>
|
||||
FunctionExecutor<
|
||||
(&'a RadixCiphertext, &'a RadixCiphertext, u128, usize, usize),
|
||||
RadixCiphertext,
|
||||
> for GpuFunctionExecutor<F>
|
||||
FunctionExecutor<(&'a RadixCiphertext, &'a RadixCiphertext, u64, usize, usize), RadixCiphertext>
|
||||
for GpuFunctionExecutor<F>
|
||||
where
|
||||
F: Fn(
|
||||
&CudaServerKey,
|
||||
&CudaUnsignedRadixCiphertext,
|
||||
&CudaUnsignedRadixCiphertext,
|
||||
u128,
|
||||
u64,
|
||||
usize,
|
||||
usize,
|
||||
&CudaStreams,
|
||||
@@ -106,7 +104,7 @@ where
|
||||
|
||||
fn execute(
|
||||
&mut self,
|
||||
input: (&'a RadixCiphertext, &'a RadixCiphertext, u128, usize, usize),
|
||||
input: (&'a RadixCiphertext, &'a RadixCiphertext, u64, usize, usize),
|
||||
) -> RadixCiphertext {
|
||||
let context = self
|
||||
.context
|
||||
@@ -133,14 +131,14 @@ where
|
||||
}
|
||||
|
||||
impl<'a, F>
|
||||
FunctionExecutor<(&'a RadixCiphertext, &'a RadixCiphertext, u128, usize), RadixCiphertext>
|
||||
FunctionExecutor<(&'a RadixCiphertext, &'a RadixCiphertext, u64, usize), RadixCiphertext>
|
||||
for GpuFunctionExecutor<F>
|
||||
where
|
||||
F: Fn(
|
||||
&CudaServerKey,
|
||||
&CudaUnsignedRadixCiphertext,
|
||||
&CudaUnsignedRadixCiphertext,
|
||||
u128,
|
||||
u64,
|
||||
usize,
|
||||
&CudaStreams,
|
||||
) -> CudaUnsignedRadixCiphertext,
|
||||
@@ -151,7 +149,7 @@ where
|
||||
|
||||
fn execute(
|
||||
&mut self,
|
||||
input: (&'a RadixCiphertext, &'a RadixCiphertext, u128, usize),
|
||||
input: (&'a RadixCiphertext, &'a RadixCiphertext, u64, usize),
|
||||
) -> RadixCiphertext {
|
||||
let context = self
|
||||
.context
|
||||
|
||||
@@ -25,57 +25,54 @@ const S_BOX: [u8; 256] = [
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
|
||||
];
|
||||
|
||||
fn plain_key_expansion(key: u128) -> Vec<u128> {
|
||||
fn plain_key_expansion(key: u64) -> Vec<u64> {
|
||||
const RCON: [u32; 10] = [
|
||||
0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000,
|
||||
0x80000000, 0x1B000000, 0x36000000,
|
||||
];
|
||||
let mut words = [0u32; 44];
|
||||
for (i, word) in words.iter_mut().enumerate().take(4) {
|
||||
*word = (key >> (96 - (i * 32))) as u32;
|
||||
// 64-bit key (2 words) * 11 rounds = 22 words
|
||||
const KEY_WORDS: usize = 2;
|
||||
const TOTAL_WORDS: usize = 22;
|
||||
|
||||
let mut words = [0u32; TOTAL_WORDS];
|
||||
for (i, word) in words.iter_mut().enumerate().take(KEY_WORDS) {
|
||||
*word = (key >> (32 - (i * 32))) as u32;
|
||||
}
|
||||
for i in 4..44 {
|
||||
|
||||
for i in KEY_WORDS..TOTAL_WORDS {
|
||||
let mut temp = words[i - 1];
|
||||
if i % 4 == 0 {
|
||||
if i % KEY_WORDS == 0 {
|
||||
temp = temp.rotate_left(8);
|
||||
let mut sub_bytes = 0u32;
|
||||
for j in 0..4 {
|
||||
let byte = (temp >> (24 - j * 8)) as u8;
|
||||
sub_bytes |= (S_BOX[byte as usize] as u32) << (24 - j * 8);
|
||||
}
|
||||
temp = sub_bytes ^ RCON[i / 4 - 1];
|
||||
temp = sub_bytes ^ RCON[i / KEY_WORDS - 1];
|
||||
}
|
||||
words[i] = words[i - 4] ^ temp;
|
||||
words[i] = words[i - KEY_WORDS] ^ temp;
|
||||
}
|
||||
words
|
||||
.chunks_exact(4)
|
||||
.map(|chunk| {
|
||||
((chunk[0] as u128) << 96)
|
||||
| ((chunk[1] as u128) << 64)
|
||||
| ((chunk[2] as u128) << 32)
|
||||
| (chunk[3] as u128)
|
||||
})
|
||||
.chunks_exact(KEY_WORDS)
|
||||
.map(|chunk| ((chunk[0] as u64) << 32) | (chunk[1] as u64))
|
||||
.collect()
|
||||
}
|
||||
fn sub_bytes(state: &mut [u8; 16]) {
|
||||
fn sub_bytes(state: &mut [u8; 8]) {
|
||||
for byte in state.iter_mut() {
|
||||
*byte = S_BOX[*byte as usize];
|
||||
}
|
||||
}
|
||||
fn shift_rows(state: &mut [u8; 16]) {
|
||||
fn shift_rows(state: &mut [u8; 8]) {
|
||||
// 4x2 state
|
||||
// Row 0: s0, s1 (no shift)
|
||||
// Row 1: s2, s3 (shift 1)
|
||||
// Row 2: s4, s5 (shift 2 -> no shift)
|
||||
// Row 3: s6, s7 (shift 3 -> shift 1)
|
||||
let original = *state;
|
||||
state[1] = original[5];
|
||||
state[5] = original[9];
|
||||
state[9] = original[13];
|
||||
state[13] = original[1];
|
||||
state[2] = original[10];
|
||||
state[6] = original[14];
|
||||
state[10] = original[2];
|
||||
state[14] = original[6];
|
||||
state[3] = original[15];
|
||||
state[7] = original[3];
|
||||
state[11] = original[7];
|
||||
state[15] = original[11];
|
||||
state[2] = original[3];
|
||||
state[3] = original[2];
|
||||
state[6] = original[7];
|
||||
state[7] = original[6];
|
||||
}
|
||||
fn gmul(mut a: u8, mut b: u8) -> u8 {
|
||||
let mut p = 0;
|
||||
@@ -92,9 +89,10 @@ fn gmul(mut a: u8, mut b: u8) -> u8 {
|
||||
}
|
||||
p
|
||||
}
|
||||
fn mix_columns(state: &mut [u8; 16]) {
|
||||
fn mix_columns(state: &mut [u8; 8]) {
|
||||
let original = *state;
|
||||
for i in 0..4 {
|
||||
// 2 columns
|
||||
for i in 0..2 {
|
||||
let col = i * 4;
|
||||
state[col] = gmul(original[col], 2)
|
||||
^ gmul(original[col + 1], 3)
|
||||
@@ -114,13 +112,13 @@ fn mix_columns(state: &mut [u8; 16]) {
|
||||
^ gmul(original[col + 3], 2);
|
||||
}
|
||||
}
|
||||
fn add_round_key(state: &mut [u8; 16], round_key: u128) {
|
||||
fn add_round_key(state: &mut [u8; 8], round_key: u64) {
|
||||
let key_bytes = round_key.to_be_bytes();
|
||||
for i in 0..16 {
|
||||
for i in 0..8 {
|
||||
state[i] ^= key_bytes[i];
|
||||
}
|
||||
}
|
||||
fn plain_aes_encrypt_block(block_bytes: &mut [u8; 16], expanded_keys: &[u128]) {
|
||||
fn plain_aes_encrypt_block(block_bytes: &mut [u8; 8], expanded_keys: &[u64]) {
|
||||
add_round_key(block_bytes, expanded_keys[0]);
|
||||
for round_key in expanded_keys.iter().take(10).skip(1) {
|
||||
sub_bytes(block_bytes);
|
||||
@@ -132,14 +130,14 @@ fn plain_aes_encrypt_block(block_bytes: &mut [u8; 16], expanded_keys: &[u128]) {
|
||||
shift_rows(block_bytes);
|
||||
add_round_key(block_bytes, expanded_keys[10]);
|
||||
}
|
||||
fn plain_aes_ctr(num_aes_inputs: usize, iv: u128, key: u128) -> Vec<u128> {
|
||||
fn plain_aes_ctr(num_aes_inputs: usize, iv: u64, key: u64) -> Vec<u64> {
|
||||
let expanded_keys = plain_key_expansion(key);
|
||||
let mut results = Vec::with_capacity(num_aes_inputs);
|
||||
for i in 0..num_aes_inputs {
|
||||
let counter_value = iv.wrapping_add(i as u128);
|
||||
let counter_value = iv.wrapping_add(i as u64);
|
||||
let mut block = counter_value.to_be_bytes();
|
||||
plain_aes_encrypt_block(&mut block, &expanded_keys);
|
||||
results.push(u128::from_be_bytes(block));
|
||||
results.push(u64::from_be_bytes(block));
|
||||
}
|
||||
results
|
||||
}
|
||||
@@ -148,7 +146,7 @@ fn internal_aes_fixed_parallelism_test<P, E>(param: P, mut executor: E, num_aes_
|
||||
where
|
||||
P: Into<TestParameters>,
|
||||
E: for<'a> FunctionExecutor<
|
||||
(&'a RadixCiphertext, &'a RadixCiphertext, u128, usize, usize),
|
||||
(&'a RadixCiphertext, &'a RadixCiphertext, u64, usize, usize),
|
||||
RadixCiphertext,
|
||||
>,
|
||||
{
|
||||
@@ -158,18 +156,18 @@ where
|
||||
let sks = Arc::new(sks);
|
||||
executor.setup(&cks, sks);
|
||||
|
||||
let key: u128 = 0x2b7e151628aed2a6abf7158809cf4f3c;
|
||||
let iv: u128 = 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff;
|
||||
let key: u64 = 0x2b7e151628aed2a6;
|
||||
let iv: u64 = 0xf0f1f2f3f4f5f6f7;
|
||||
|
||||
let plain_results = plain_aes_ctr(num_aes_inputs, iv, key);
|
||||
|
||||
let ctxt_key = cks.encrypt_u128_for_aes_ctr(key);
|
||||
let ctxt_iv = cks.encrypt_u128_for_aes_ctr(iv);
|
||||
let ctxt_key = cks.encrypt_u64_for_aes_ctr(key);
|
||||
let ctxt_iv = cks.encrypt_u64_for_aes_ctr(iv);
|
||||
|
||||
for sbox_parallelism in [1, 2, 4, 8, 16] {
|
||||
let encrypted_result =
|
||||
executor.execute((&ctxt_key, &ctxt_iv, 0, num_aes_inputs, sbox_parallelism));
|
||||
let fhe_results = cks.decrypt_u128_from_aes_ctr(&encrypted_result, num_aes_inputs);
|
||||
let fhe_results = cks.decrypt_u64_from_aes_ctr(&encrypted_result, num_aes_inputs);
|
||||
assert_eq!(fhe_results, plain_results);
|
||||
}
|
||||
}
|
||||
@@ -178,7 +176,7 @@ pub fn aes_fixed_parallelism_1_input_test<P, E>(param: P, executor: E)
|
||||
where
|
||||
P: Into<TestParameters>,
|
||||
E: for<'a> FunctionExecutor<
|
||||
(&'a RadixCiphertext, &'a RadixCiphertext, u128, usize, usize),
|
||||
(&'a RadixCiphertext, &'a RadixCiphertext, u64, usize, usize),
|
||||
RadixCiphertext,
|
||||
>,
|
||||
{
|
||||
@@ -189,7 +187,7 @@ pub fn aes_fixed_parallelism_2_inputs_test<P, E>(param: P, executor: E)
|
||||
where
|
||||
P: Into<TestParameters>,
|
||||
E: for<'a> FunctionExecutor<
|
||||
(&'a RadixCiphertext, &'a RadixCiphertext, u128, usize, usize),
|
||||
(&'a RadixCiphertext, &'a RadixCiphertext, u64, usize, usize),
|
||||
RadixCiphertext,
|
||||
>,
|
||||
{
|
||||
@@ -200,7 +198,7 @@ pub fn aes_dynamic_parallelism_many_inputs_test<P, E>(param: P, mut executor: E)
|
||||
where
|
||||
P: Into<TestParameters>,
|
||||
E: for<'a> FunctionExecutor<
|
||||
(&'a RadixCiphertext, &'a RadixCiphertext, u128, usize),
|
||||
(&'a RadixCiphertext, &'a RadixCiphertext, u64, usize),
|
||||
RadixCiphertext,
|
||||
>,
|
||||
{
|
||||
@@ -210,16 +208,16 @@ where
|
||||
let sks = Arc::new(sks);
|
||||
executor.setup(&cks, sks);
|
||||
|
||||
let key: u128 = 0x2b7e151628aed2a6abf7158809cf4f3c;
|
||||
let iv: u128 = 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff;
|
||||
let key: u64 = 0x2b7e151628aed2a6;
|
||||
let iv: u64 = 0xf0f1f2f3f4f5f6f7;
|
||||
|
||||
let ctxt_key = cks.encrypt_u128_for_aes_ctr(key);
|
||||
let ctxt_iv = cks.encrypt_u128_for_aes_ctr(iv);
|
||||
let ctxt_key = cks.encrypt_u64_for_aes_ctr(key);
|
||||
let ctxt_iv = cks.encrypt_u64_for_aes_ctr(iv);
|
||||
|
||||
for num_aes_inputs in [4, 8, 16, 32] {
|
||||
let plain_results = plain_aes_ctr(num_aes_inputs, iv, key);
|
||||
let encrypted_result = executor.execute((&ctxt_key, &ctxt_iv, 0, num_aes_inputs));
|
||||
let fhe_results = cks.decrypt_u128_from_aes_ctr(&encrypted_result, num_aes_inputs);
|
||||
let fhe_results = cks.decrypt_u64_from_aes_ctr(&encrypted_result, num_aes_inputs);
|
||||
assert_eq!(fhe_results, plain_results);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user