fix(csprng): enable target_feature attributes for functions using simd intrinsics

This commit is contained in:
sarah el kazdadi
2023-09-18 11:43:44 +02:00
committed by sarah
parent 6cdd41c22f
commit 30a5ade17f
3 changed files with 60 additions and 43 deletions

View File

@@ -35,17 +35,23 @@ impl AesBlockCipher for ArmAesBlockCipher {
}
fn generate_batch(&mut self, AesIndex(aes_ctr): AesIndex) -> [u8; BYTES_PER_BATCH] {
let mut output = [0u8; BYTES_PER_BATCH];
// We want 128 bytes of output, the ctr gives 128 bit message (16 bytes)
for (i, out) in output.chunks_exact_mut(16).enumerate() {
let encrypted = unsafe {
#[target_feature(enable = "aes,neon")]
unsafe fn implementation(
this: &ArmAesBlockCipher,
AesIndex(aes_ctr): AesIndex,
) -> [u8; BYTES_PER_BATCH] {
let mut output = [0u8; BYTES_PER_BATCH];
// We want 128 bytes of output, the ctr gives 128 bit message (16 bytes)
for (i, out) in output.chunks_exact_mut(16).enumerate() {
// Safe because we prevent the user from creating the Generator
// on non-supported hardware
encrypt(aes_ctr + (i as u128), &self.round_keys)
};
out.copy_from_slice(&encrypted.to_ne_bytes());
let encrypted = encrypt(aes_ctr + (i as u128), &this.round_keys);
out.copy_from_slice(&encrypted.to_ne_bytes());
}
output
}
output
// SAFETY: we checked for aes and neon availability in `Self::new`
unsafe { implementation(self, AesIndex(aes_ctr)) }
}
}
@@ -55,6 +61,7 @@ impl AesBlockCipher for ArmAesBlockCipher {
///
/// You must make sure the CPU's arch is`aarch64` and has
/// `neon` and `aes` features.
#[inline(always)]
unsafe fn sub_word(word: u32) -> u32 {
let data = vreinterpretq_u8_u32(vdupq_n_u32(word));
let zero_key = vdupq_n_u8(0u8);
@@ -68,14 +75,17 @@ unsafe fn sub_word(word: u32) -> u32 {
vgetq_lane_u32::<0>(vreinterpretq_u32_u8(temp))
}
#[inline(always)]
fn uint8x16_t_to_u128(input: uint8x16_t) -> u128 {
unsafe { transmute(input) }
}
#[inline(always)]
fn u128_to_uint8x16_t(input: u128) -> uint8x16_t {
unsafe { transmute(input) }
}
#[target_feature(enable = "aes,neon")]
unsafe fn generate_round_keys(key: AesKey) -> [uint8x16_t; NUM_ROUND_KEYS] {
let mut round_keys: [uint8x16_t; NUM_ROUND_KEYS] = std::mem::zeroed();
round_keys[0] = u128_to_uint8x16_t(key.0);
@@ -109,6 +119,7 @@ unsafe fn generate_round_keys(key: AesKey) -> [uint8x16_t; NUM_ROUND_KEYS] {
///
/// You must make sure the CPU's arch is`aarch64` and has
/// `neon` and `aes` features.
#[inline(always)]
unsafe fn encrypt(message: u128, keys: &[uint8x16_t; NUM_ROUND_KEYS]) -> u128 {
// Notes:
// According the [ARM Manual](https://developer.arm.com/documentation/ddi0487/gb/):

View File

@@ -1,7 +1,7 @@
use crate::generators::aes_ctr::{AesBlockCipher, AesIndex, AesKey, BYTES_PER_BATCH};
use std::arch::x86_64::{
__m128i, _mm_aesenc_si128, _mm_aesenclast_si128, _mm_aeskeygenassist_si128, _mm_load_si128,
_mm_shuffle_epi32, _mm_slli_si128, _mm_store_si128, _mm_xor_si128,
__m128i, _mm_aesenc_si128, _mm_aesenclast_si128, _mm_aeskeygenassist_si128, _mm_shuffle_epi32,
_mm_slli_si128, _mm_store_si128, _mm_xor_si128,
};
use std::mem::transmute;
@@ -25,26 +25,36 @@ impl AesBlockCipher for AesniBlockCipher {
)
}
let round_keys = generate_round_keys(key);
// SAFETY: we checked for aes and sse2 availability
let round_keys = unsafe { generate_round_keys(key) };
AesniBlockCipher { round_keys }
}
fn generate_batch(&mut self, AesIndex(aes_ctr): AesIndex) -> [u8; BYTES_PER_BATCH] {
si128arr_to_u8arr(aes_encrypt_many(
&u128_to_si128(aes_ctr),
&u128_to_si128(aes_ctr + 1),
&u128_to_si128(aes_ctr + 2),
&u128_to_si128(aes_ctr + 3),
&u128_to_si128(aes_ctr + 4),
&u128_to_si128(aes_ctr + 5),
&u128_to_si128(aes_ctr + 6),
&u128_to_si128(aes_ctr + 7),
&self.round_keys,
))
#[target_feature(enable = "sse2,aes")]
unsafe fn implementation(
this: &AesniBlockCipher,
AesIndex(aes_ctr): AesIndex,
) -> [u8; BYTES_PER_BATCH] {
si128arr_to_u8arr(aes_encrypt_many(
u128_to_si128(aes_ctr),
u128_to_si128(aes_ctr + 1),
u128_to_si128(aes_ctr + 2),
u128_to_si128(aes_ctr + 3),
u128_to_si128(aes_ctr + 4),
u128_to_si128(aes_ctr + 5),
u128_to_si128(aes_ctr + 6),
u128_to_si128(aes_ctr + 7),
&this.round_keys,
))
}
// SAFETY: we checked for aes and sse2 availability in `Self::new`
unsafe { implementation(self, AesIndex(aes_ctr)) }
}
}
fn generate_round_keys(key: AesKey) -> [__m128i; 11] {
#[target_feature(enable = "sse2,aes")]
unsafe fn generate_round_keys(key: AesKey) -> [__m128i; 11] {
let key = u128_to_si128(key.0);
let mut keys: [__m128i; 11] = [u128_to_si128(0); 11];
aes_128_key_expansion(key, &mut keys);
@@ -54,27 +64,19 @@ fn generate_round_keys(key: AesKey) -> [__m128i; 11] {
// Uses aes to encrypt many values at once. This allows a substantial speedup (around 30%)
// compared to the naive approach.
#[allow(clippy::too_many_arguments)]
#[inline(always)]
fn aes_encrypt_many(
message_1: &__m128i,
message_2: &__m128i,
message_3: &__m128i,
message_4: &__m128i,
message_5: &__m128i,
message_6: &__m128i,
message_7: &__m128i,
message_8: &__m128i,
message_1: __m128i,
message_2: __m128i,
message_3: __m128i,
message_4: __m128i,
message_5: __m128i,
message_6: __m128i,
message_7: __m128i,
message_8: __m128i,
keys: &[__m128i; 11],
) -> [__m128i; 8] {
unsafe {
let message_1 = _mm_load_si128(message_1 as *const __m128i);
let message_2 = _mm_load_si128(message_2 as *const __m128i);
let message_3 = _mm_load_si128(message_3 as *const __m128i);
let message_4 = _mm_load_si128(message_4 as *const __m128i);
let message_5 = _mm_load_si128(message_5 as *const __m128i);
let message_6 = _mm_load_si128(message_6 as *const __m128i);
let message_7 = _mm_load_si128(message_7 as *const __m128i);
let message_8 = _mm_load_si128(message_8 as *const __m128i);
let mut tmp_1 = _mm_xor_si128(message_1, keys[0]);
let mut tmp_2 = _mm_xor_si128(message_2, keys[0]);
let mut tmp_3 = _mm_xor_si128(message_3, keys[0]);
@@ -125,6 +127,7 @@ fn aes_128_assist(temp1: __m128i, temp2: __m128i) -> __m128i {
temp1
}
#[inline(always)]
fn aes_128_key_expansion(key: __m128i, keys: &mut [__m128i; 11]) {
let (mut temp1, mut temp2): (__m128i, __m128i);
temp1 = key;
@@ -163,6 +166,7 @@ fn aes_128_key_expansion(key: __m128i, keys: &mut [__m128i; 11]) {
}
}
#[inline(always)]
fn u128_to_si128(input: u128) -> __m128i {
unsafe { transmute(input) }
}
@@ -172,6 +176,7 @@ fn si128_to_u128(input: __m128i) -> u128 {
unsafe { transmute(input) }
}
#[inline(always)]
fn si128arr_to_u8arr(input: [__m128i; 8]) -> [u8; BYTES_PER_BATCH] {
unsafe { transmute(input) }
}
@@ -217,7 +222,7 @@ mod test {
let mut keys: [__m128i; 11] = [u128_to_si128(0); 11];
aes_128_key_expansion(key, &mut keys);
let ciphertexts = aes_encrypt_many(
&message, &message, &message, &message, &message, &message, &message, &message, &keys,
message, message, message, message, message, message, message, message, &keys,
);
for ct in &ciphertexts {
assert_eq!(CIPHERTEXT, si128_to_u128(*ct));

View File

@@ -8,7 +8,7 @@ pub struct RdseedSeeder;
impl Seeder for RdseedSeeder {
fn seed(&mut self) -> Seed {
Seed(rdseed_random_m128())
Seed(unsafe { rdseed_random_m128() })
}
fn is_available() -> bool {
@@ -17,7 +17,8 @@ impl Seeder for RdseedSeeder {
}
// Generates a random 128 bits value from rdseed
fn rdseed_random_m128() -> u128 {
#[target_feature(enable = "rdseed")]
unsafe fn rdseed_random_m128() -> u128 {
let mut rand1: u64 = 0;
let mut rand2: u64 = 0;
let mut output_bytes = [0u8; 16];