feat(optimizer): add gpu parameter constraints

This commit is contained in:
Mayeul@Zama
2022-10-18 14:37:55 +02:00
committed by mayeul-zama
parent 1f15162b39
commit c5dad8ffdb
20 changed files with 286 additions and 175 deletions

View File

@@ -1,5 +1,6 @@
use charts::{draw, Serie};
use concrete_optimizer::computing_cost::cpu::CpuComplexity;
use concrete_optimizer::config;
use concrete_optimizer::global_parameters::DEFAUT_DOMAINS;
use concrete_optimizer::optimization::atomic_pattern::{self as optimize_atomic_pattern};
use concrete_optimizer::optimization::config::{Config, SearchSpace};
@@ -25,6 +26,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let glwe_dimensions: Vec<_> = (1..=6).collect();
let internal_lwe_dimensions: Vec<_> = (MIN_LWE_DIM..=MAX_LWE_DIM).step_by(10).collect();
let processing_unit = config::ProcessingUnit::Cpu;
let search_space = SearchSpace {
glwe_log_polynomial_sizes,
glwe_dimensions,
@@ -41,7 +44,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
complexity_model: &CpuComplexity::default(),
};
let cache = decomposition::cache(security_level);
let cache = decomposition::cache(security_level, processing_unit, None);
let solutions: Vec<_> = log_norm2s
.clone()

View File

@@ -1,5 +1,6 @@
use charts::{draw, Serie};
use concrete_optimizer::computing_cost::cpu::CpuComplexity;
use concrete_optimizer::config;
use concrete_optimizer::global_parameters::DEFAUT_DOMAINS;
use concrete_optimizer::optimization::atomic_pattern::{self as optimize_atomic_pattern};
use concrete_optimizer::optimization::config::{Config, SearchSpace};
@@ -18,6 +19,8 @@ pub const MIN_LWE_DIM: u64 = DEFAUT_DOMAINS.free_glwe.glwe_dimension.start as u6
pub const MAX_LWE_DIM: u64 = DEFAUT_DOMAINS.free_glwe.glwe_dimension.end as u64 - 1;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let processing_unit = config::ProcessingUnit::Cpu;
let sum_size = 4096;
let p_error = _4_SIGMA;
let security_level = 128;
@@ -41,7 +44,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
complexity_model: &CpuComplexity::default(),
};
let cache = decomposition::cache(security_level);
let cache = decomposition::cache(security_level, processing_unit, None);
let solutions: Vec<_> = precisions
.clone()

View File

@@ -1,4 +1,5 @@
use concrete_optimizer::computing_cost::cpu::CpuComplexity;
use concrete_optimizer::config;
use concrete_optimizer::dag::operator::{
self, FunctionTable, LevelledComplexity, OperatorIndex, Precision, Shape,
};
@@ -22,6 +23,8 @@ fn no_dag_solution() -> ffi::DagSolution {
}
fn optimize_bootstrap(precision: u64, noise_factor: f64, options: ffi::Options) -> ffi::Solution {
let processing_unit = config::ProcessingUnit::Cpu;
let config = Config {
security_level: options.security_level,
maximum_acceptable_error_probability: options.maximum_acceptable_error_probability,
@@ -31,7 +34,7 @@ fn optimize_bootstrap(precision: u64, noise_factor: f64, options: ffi::Options)
let sum_size = 1;
let search_space = SearchSpace::default();
let search_space = SearchSpace::default(processing_unit);
let result = concrete_optimizer::optimization::atomic_pattern::optimize_one(
sum_size,
@@ -39,7 +42,7 @@ fn optimize_bootstrap(precision: u64, noise_factor: f64, options: ffi::Options)
config,
noise_factor,
&search_space,
&decomposition::cache(options.security_level),
&decomposition::cache(options.security_level, processing_unit, None),
);
result
.best_solution
@@ -199,6 +202,7 @@ impl OperationDag {
}
fn optimize_v0(&self, options: ffi::Options) -> ffi::Solution {
let processing_unit = config::ProcessingUnit::Cpu;
let config = Config {
security_level: options.security_level,
maximum_acceptable_error_probability: options.maximum_acceptable_error_probability,
@@ -206,13 +210,13 @@ impl OperationDag {
complexity_model: &CpuComplexity::default(),
};
let search_space = SearchSpace::default();
let search_space = SearchSpace::default(processing_unit);
let result = concrete_optimizer::optimization::dag::solo_key::optimize::optimize(
&self.0,
config,
&search_space,
&decomposition::cache(options.security_level),
&decomposition::cache(options.security_level, processing_unit, None),
);
result
.best_solution
@@ -220,6 +224,7 @@ impl OperationDag {
}
fn optimize(&self, options: ffi::Options) -> ffi::DagSolution {
let processing_unit = config::ProcessingUnit::Cpu;
let config = Config {
security_level: options.security_level,
maximum_acceptable_error_probability: options.maximum_acceptable_error_probability,
@@ -227,8 +232,8 @@ impl OperationDag {
complexity_model: &CpuComplexity::default(),
};
let search_space = SearchSpace::default();
let cache = decomposition::cache(options.security_level);
let search_space = SearchSpace::default(processing_unit);
let cache = decomposition::cache(options.security_level, processing_unit, None);
let result = concrete_optimizer::optimization::dag::solo_key::optimize_generic::optimize(
&self.0,

View File

@@ -2,6 +2,7 @@ use super::complexity::Complexity;
use super::complexity_model::ComplexityModel;
use crate::parameters::AtomicPatternParameters;
#[allow(dead_code)]
pub fn atomic_pattern_complexity(
complexity_model: &dyn ComplexityModel,
sum_size: u64,

View File

@@ -4,6 +4,7 @@ use super::operators::keyswitch_lwe::KsComplexity;
use super::operators::{keyswitch_lwe, pbs};
use crate::parameters::{KeyswitchParameters, LweDimension, PbsParameters};
#[derive(Clone)]
pub struct CpuComplexity {
pub ks_lwe: keyswitch_lwe::KsComplexity,
pub pbs: pbs::PbsComplexity,

View File

@@ -1,6 +1,7 @@
use super::complexity::Complexity;
/** Standard fft complexity model */
#[derive(Clone)]
pub struct AsymptoticWithFactors {
factor_fft: f64, // factor applied on asymptotic complexity
factor_ifft: f64, // factor applied on asymptotic complexity
@@ -32,14 +33,6 @@ impl Default for AsymptoticWithFactors {
#[cfg(test)]
pub mod tests {
use crate::computing_cost::fft;
use crate::computing_cost::fft::AsymptoticWithFactors;
/** Standard fft complexity with X factors*/
pub const COST_AWS: AsymptoticWithFactors = AsymptoticWithFactors {
// https://github.com/zama-ai/concrete-optimizer/blob/prototype/python/optimizer/noise_formulas/bootstrap.py#L150
factor_fft: 0.202_926_951_153_089_17,
factor_ifft: 0.407_795_078_512_891,
};
#[test]
fn golden_python_prototype() {

View File

@@ -4,125 +4,52 @@ use crate::parameters::{KeyswitchParameters, LweDimension, PbsParameters};
use crate::utils::square;
#[derive(Clone, Copy)]
pub struct GpuPbsComplexity {
pub w1: f64,
pub w2: f64,
pub w3: f64,
pub w4: f64,
pub occupancy: f64,
}
//https://github.com/zama-ai/concrete-core-internal/issues/91
impl GpuPbsComplexity {
pub fn default_lowlat_u64(occupancy: f64) -> Self {
Self {
w1: 2_576.105_013_4,
w2: -21_631.382_229_52,
w3: -86_525.527_535_17,
w4: 0.125_472_398_538_904_43,
occupancy,
}
}
pub enum GpuPbsComplexity {
Lowlat,
Amortized,
}
#[derive(Clone, Copy)]
pub struct GpuKsComplexity {
pub w1: f64,
pub w2: f64,
pub w3: f64,
pub w4: f64,
pub occupancy: f64,
pub number_of_sm: u64,
}
// https://github.com/zama-ai/concrete-core-internal/issues/90
impl GpuKsComplexity {
pub fn default_u64(occupancy: f64, number_of_sm: u64) -> Self {
Self {
w1: 7_959.869_676_54,
w2: 3_866.817_732_87,
w3: 8_353.484_127_44,
w4: 0.125_472_398_538_904_43,
occupancy,
number_of_sm,
}
}
}
pub struct GpuKsComplexity;
#[derive(Clone, Copy)]
pub struct GpuComplexity {
pub ks: GpuKsComplexity,
pub pbs: GpuPbsComplexity,
pub ncores: u64,
pub number_of_sm: u64,
}
impl GpuComplexity {
pub fn default_lowlat_u64(number_of_sm: u64) -> Self {
Self {
ks: GpuKsComplexity,
pbs: GpuPbsComplexity::Lowlat,
number_of_sm,
}
}
pub fn default_amortized_u64(number_of_sm: u64) -> Self {
Self {
ks: GpuKsComplexity,
pbs: GpuPbsComplexity::Amortized,
number_of_sm,
}
}
}
impl ComplexityModel for GpuComplexity {
#[allow(clippy::let_and_return, non_snake_case)]
fn pbs_complexity(&self, params: PbsParameters, _ciphertext_modulus_log: u32) -> Complexity {
let GpuPbsComplexity {
w1,
w2,
w3,
w4,
occupancy,
} = self.pbs;
let n = params.internal_lwe_dimension.0 as f64;
let k = params.output_glwe_params.glwe_dimension as f64;
let N = (1 << params.output_glwe_params.log2_polynomial_size) as f64;
let ell = params.br_decomposition_parameter.level as f64;
let number_of_ct = 1.;
let number_of_operations = number_of_ct * algorithmic_complexity_pbs(n, k, N, ell);
let size = std::mem::size_of::<u64>() as f64;
let pbs_cost = w4 * number_of_operations / (self.ncores as f64 * occupancy)
+ (w1 * n * (2. + ell * N * square(k + 1.))
+ 2. * N * ell * (w2 + w3 * square(k + 1.)))
* size;
pbs_cost
fn pbs_complexity(&self, _params: PbsParameters, _ciphertext_modulus_log: u32) -> Complexity {
todo!()
}
#[allow(clippy::let_and_return)]
fn ks_complexity(
&self,
params: KeyswitchParameters,
ciphertext_modulus_log: u32,
_params: KeyswitchParameters,
_ciphertext_modulus_log: u32,
) -> Complexity {
let GpuKsComplexity {
w1,
w2,
w3,
w4,
occupancy,
number_of_sm,
} = self.ks;
let na = params.input_lwe_dimension.0 as f64;
let nb = params.output_lwe_dimension.0 as f64;
let ell = params.ks_decomposition_parameter.level as f64;
let number_of_ct = 1.;
let number_of_operations =
number_of_ct * algorithmic_complexity_ks(na, nb, ell, ciphertext_modulus_log as f64);
let size = std::mem::size_of::<u64>() as f64;
let ks_cost = w4 * number_of_operations / (self.ncores as f64 * occupancy)
+ w1 * (number_of_ct * ((na + 1.) + (nb + 1.)) + ell * (nb + 1.) * na) * size
+ w2 * number_of_ct * nb * size
+ w3 * (number_of_ct / number_of_ct.min(number_of_sm as f64 * 12.)).ceil()
* ((na + 1.) + (nb + 1.))
+ ell * (nb + 1.) * size;
ks_cost
todo!()
}
fn levelled_complexity(
@@ -136,6 +63,7 @@ impl ComplexityModel for GpuComplexity {
}
#[allow(non_snake_case)]
#[allow(dead_code)]
fn algorithmic_complexity_pbs(n: f64, k: f64, N: f64, ell: f64) -> f64 {
n * (ell * (k + 1.) * N * (N.log2() + 1.)
+ (k + 1.) * N * (N.log2() + 1.)
@@ -143,6 +71,7 @@ fn algorithmic_complexity_pbs(n: f64, k: f64, N: f64, ell: f64) -> f64 {
}
#[allow(non_snake_case)]
#[allow(dead_code)]
fn algorithmic_complexity_ks(na: f64, nb: f64, ell: f64, log2_q: f64) -> f64 {
na * nb * ell * log2_q
}

View File

@@ -1,7 +1,7 @@
pub mod atomic_pattern;
mod atomic_pattern;
pub mod complexity;
pub mod complexity_model;
pub mod cpu;
pub mod fft;
mod fft;
pub mod gpu;
pub mod operators;

View File

@@ -3,6 +3,7 @@ use super::super::fft;
use crate::parameters::CmuxParameters;
use crate::utils::square;
#[derive(Clone)]
pub struct SimpleWithFactors {
fft: fft::AsymptoticWithFactors,
blind_rotate_factor: f64,

View File

@@ -1,6 +1,7 @@
use super::super::complexity::Complexity;
use crate::parameters::KeyswitchParameters;
#[derive(Clone)]
pub struct KsComplexity;
impl KsComplexity {

View File

@@ -1,3 +1,3 @@
pub mod cmux;
pub mod keyswitch_lwe;
pub mod pbs;
pub(super) mod keyswitch_lwe;
pub(super) mod pbs;

View File

@@ -2,7 +2,7 @@ use super::super::complexity::Complexity;
use super::cmux;
use crate::parameters::PbsParameters;
#[derive(Default)]
#[derive(Default, Clone)]
pub struct PbsComplexity {
pub cmux: cmux::SimpleWithFactors,
}

View File

@@ -0,0 +1,63 @@
use std::sync::Arc;
use crate::computing_cost::complexity_model::ComplexityModel;
use crate::computing_cost::cpu::CpuComplexity;
use crate::computing_cost::gpu::GpuComplexity;
use crate::optimization::config::{MAX_LOG2_BASE_CPU, MAX_LOG2_BASE_GPU};
#[derive(Clone, Copy)]
pub enum ProcessingUnit {
Cpu,
Gpu {
pbs_type: GpuPbsType,
number_of_sm: u64,
},
}
#[derive(Clone, Copy)]
pub enum GpuPbsType {
Lowlat,
Amortized,
}
impl ProcessingUnit {
pub fn max_br_base_log(self) -> u64 {
match self {
Self::Cpu => MAX_LOG2_BASE_CPU,
Self::Gpu { .. } => MAX_LOG2_BASE_GPU,
}
}
pub fn ks_to_string(self) -> &'static str {
match self {
Self::Cpu => "cpu",
Self::Gpu { .. } => "gpu",
}
}
pub fn br_to_string(self) -> &'static str {
match self {
Self::Cpu => "cpu",
Self::Gpu {
pbs_type: GpuPbsType::Lowlat,
..
} => "gpu_lowlat",
Self::Gpu {
pbs_type: GpuPbsType::Amortized,
..
} => "gpu_amortized",
}
}
pub fn complexity_model(self) -> Arc<dyn ComplexityModel> {
match self {
Self::Cpu => Arc::new(CpuComplexity::default()),
Self::Gpu {
pbs_type: GpuPbsType::Amortized,
number_of_sm,
} => Arc::new(GpuComplexity::default_amortized_u64(number_of_sm)),
Self::Gpu {
pbs_type: GpuPbsType::Lowlat,
number_of_sm,
} => Arc::new(GpuComplexity::default_lowlat_u64(number_of_sm)),
}
}
}

View File

@@ -19,6 +19,7 @@
pub mod computing_cost;
pub mod config;
pub mod dag;
pub mod global_parameters;
pub mod noise_estimator;

View File

@@ -1,4 +1,6 @@
use crate::computing_cost::complexity_model::ComplexityModel;
use crate::config;
use crate::config::GpuPbsType;
use crate::global_parameters::DEFAUT_DOMAINS;
#[derive(Clone, Copy, Debug)]
@@ -23,8 +25,8 @@ pub struct SearchSpace {
pub internal_lwe_dimensions: Vec<u64>,
}
impl Default for SearchSpace {
fn default() -> Self {
impl SearchSpace {
pub fn default_cpu() -> Self {
let glwe_log_polynomial_sizes: Vec<u64> = DEFAUT_DOMAINS
.glwe_pbs_constrained
.log2_polynomial_size
@@ -38,4 +40,55 @@ impl Default for SearchSpace {
internal_lwe_dimensions,
}
}
pub fn default_gpu_lowlat() -> Self {
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_low_latency.cu#L156
let glwe_log_polynomial_sizes: Vec<u64> = (9..=11).collect();
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_low_latency.cu#L154
let glwe_dimensions: Vec<u64> = vec![1];
let internal_lwe_dimensions: Vec<u64> = DEFAUT_DOMAINS.free_glwe.glwe_dimension.as_vec();
Self {
glwe_log_polynomial_sizes,
glwe_dimensions,
internal_lwe_dimensions,
}
}
pub fn default_gpu_amortized() -> Self {
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_amortized.cu#L79
let glwe_log_polynomial_sizes: Vec<u64> = (9..=13).collect();
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_amortized.cu#L78
let glwe_dimensions: Vec<u64> = vec![1];
let internal_lwe_dimensions: Vec<u64> = DEFAUT_DOMAINS.free_glwe.glwe_dimension.as_vec();
Self {
glwe_log_polynomial_sizes,
glwe_dimensions,
internal_lwe_dimensions,
}
}
pub fn default(processing_unit: config::ProcessingUnit) -> Self {
match processing_unit {
config::ProcessingUnit::Cpu => Self::default_cpu(),
config::ProcessingUnit::Gpu {
pbs_type: GpuPbsType::Amortized,
..
} => Self::default_gpu_amortized(),
config::ProcessingUnit::Gpu {
pbs_type: GpuPbsType::Lowlat,
..
} => Self::default_gpu_lowlat(),
}
}
}
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_amortized.cu#L77
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_low_latency.cu#L153
pub const MAX_LOG2_BASE_GPU: u64 = 16;
pub const MAX_LOG2_BASE_CPU: u64 = 64;

View File

@@ -359,12 +359,12 @@ mod tests {
use super::*;
use crate::computing_cost::cpu::CpuComplexity;
use crate::config;
use crate::dag::operator::{FunctionTable, Shape, Weights};
use crate::noise_estimator::p_error::repeat_p_error;
use crate::optimization::atomic_pattern;
use crate::optimization::config::SearchSpace;
use crate::optimization::dag::solo_key::symbolic_variance::VarianceOrigin;
use crate::optimization::decomposition;
use crate::optimization::{atomic_pattern, decomposition};
use crate::utils::square;
fn small_relative_diff(v1: f64, v2: f64) -> bool {
@@ -399,7 +399,7 @@ mod tests {
complexity_model: &CpuComplexity::default(),
};
let search_space = SearchSpace::default();
let search_space = SearchSpace::default_cpu();
super::optimize(dag, config, &search_space, cache)
}
@@ -429,7 +429,9 @@ mod tests {
}
fn v0_parameter_ref(precision: u64, weight: u64, times: &mut Times) {
let search_space = SearchSpace::default();
let processing_unit = config::ProcessingUnit::Cpu;
let search_space = SearchSpace::default(processing_unit);
let sum_size = 1;
@@ -440,7 +442,7 @@ mod tests {
complexity_model: &CpuComplexity::default(),
};
let cache = decomposition::cache(config.security_level);
let cache = decomposition::cache(config.security_level, processing_unit, None);
let _ = optimize_v0(
sum_size,
@@ -499,9 +501,10 @@ mod tests {
}
fn v0_parameter_ref_with_dot(precision: Precision, weight: i64) {
let processing_unit = config::ProcessingUnit::Cpu;
let security_level = 128;
let cache = decomposition::cache(security_level);
let cache = decomposition::cache(security_level, processing_unit, None);
let mut dag = unparametrized::OperationDag::new();
{
@@ -530,7 +533,7 @@ mod tests {
assert_f64_eq(square(weight) as f64, constraint.pareto_in_lut[0].lut_coeff);
}
let search_space = SearchSpace::default();
let search_space = SearchSpace::default(processing_unit);
let config = Config {
security_level,
@@ -589,7 +592,8 @@ mod tests {
}
#[test]
fn test_lut_vs_no_lut() {
let cache = decomposition::cache(128);
let processing_unit = config::ProcessingUnit::Cpu;
let cache = decomposition::cache(128, processing_unit, None);
for precision in 1..=8 {
no_lut_vs_lut(precision, &cache);
}
@@ -632,7 +636,8 @@ mod tests {
#[test]
fn test_lut_with_input_base_noise_better_than_lut_with_lut_base_noise() {
let cache = decomposition::cache(128);
let processing_unit = config::ProcessingUnit::Cpu;
let cache = decomposition::cache(128, processing_unit, None);
for log_weight in 1..=16 {
let weight = 1 << log_weight;
for precision in 5..=9 {
@@ -666,7 +671,8 @@ mod tests {
#[test]
fn test_lut_1_layer_is_better() {
let cache = decomposition::cache(128);
let processing_unit = config::ProcessingUnit::Cpu;
let cache = decomposition::cache(128, processing_unit, None);
// for some reason on 4, 5, 6, the complexity is already minimal
// this could be due to pre-defined pareto set
for precision in [1, 2, 3, 7, 8] {
@@ -722,7 +728,8 @@ mod tests {
#[test]
fn test_multi_precision_dominate_single() {
let cache = decomposition::cache(128);
let processing_unit = config::ProcessingUnit::Cpu;
let cache = decomposition::cache(128, processing_unit, None);
let mut prev = Some(true); // true -> ... -> true -> false -> ... -> false
for log2_weight in 0..29 {
let weight = 1 << log2_weight;
@@ -756,7 +763,8 @@ mod tests {
#[test]
fn test_global_p_error_input() {
let cache = decomposition::cache(128);
let processing_unit = config::ProcessingUnit::Cpu;
let cache = decomposition::cache(128, processing_unit, None);
for precision in [4_u8, 8] {
for weight in [1, 3, 27, 243, 729] {
for dim in [1, 2, 16, 32] {
@@ -786,7 +794,8 @@ mod tests {
#[test]
fn test_global_p_error_lut() {
let cache = decomposition::cache(128);
let processing_unit = config::ProcessingUnit::Cpu;
let cache = decomposition::cache(128, processing_unit, None);
for precision in [4_u8, 8] {
for weight in [1, 3, 27, 243, 729] {
for depth in [2, 16, 32] {
@@ -847,7 +856,8 @@ mod tests {
#[allow(clippy::unnecessary_cast)] // clippy bug refusing as Precision on const
#[test]
fn test_global_p_error_dominating_lut() {
let cache = decomposition::cache(128);
let processing_unit = config::ProcessingUnit::Cpu;
let cache = decomposition::cache(128, processing_unit, None);
let depth = 128;
let weights_low = 1;
let weights_high = 1;
@@ -875,7 +885,8 @@ mod tests {
#[allow(clippy::unnecessary_cast)] // clippy bug refusing as Precision on const
#[test]
fn test_global_p_error_non_dominating_lut() {
let cache = decomposition::cache(128);
let processing_unit = config::ProcessingUnit::Cpu;
let cache = decomposition::cache(128, processing_unit, None);
let depth = 128;
let weights_low = 1024 * 1024 * 3;
let weights_high = 1;

View File

@@ -1,13 +1,15 @@
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use concrete_commons::dispersion::DispersionParameter;
use crate::computing_cost::operators::pbs::PbsComplexity;
use crate::computing_cost::complexity_model::ComplexityModel;
use crate::noise_estimator::operators::atomic_pattern as noise_atomic_pattern;
use crate::parameters::{BrDecompositionParameters, GlweParameters, LweDimension, PbsParameters};
use crate::security;
use crate::utils::cache::ephemeral::{CacheHashMap, EphemeralCache};
use crate::utils::cache::persistent::PersistentCacheHashMap;
use crate::{config, security};
use super::common::MacroParam;
use super::cut::ComplexityNoise;
@@ -21,10 +23,12 @@ pub struct BrComplexityNoise {
/* This is stricly variance decreasing and strictly complexity increasing */
pub fn pareto_quantities(
complexity_model: &dyn ComplexityModel,
ciphertext_modulus_log: u32,
security_level: u64,
internal_dim: u64,
glwe_params: GlweParameters,
max_log2_base: u64,
) -> Vec<BrComplexityNoise> {
assert!(ciphertext_modulus_log == 64);
let pbs_param = |level, log2_base| {
@@ -38,24 +42,22 @@ pub fn pareto_quantities(
let variance_bsk =
security::glwe::minimal_variance(glwe_params, ciphertext_modulus_log, security_level);
let mut quantities = Vec::with_capacity(64);
let mut quantities = Vec::with_capacity(max_log2_base as usize);
let mut increasing_complexity = 0.0;
let mut decreasing_variance = f64::INFINITY;
let mut counting_no_progress = 0;
let mut prev_best_log2_base = 0_u64;
let max_level = ciphertext_modulus_log as u64;
for level in 1..=max_level {
let mut prev_best_log2_base = max_log2_base;
for level in 1..=ciphertext_modulus_log as u64 {
// detect increasing noise
let mut level_decreasing_base_noise = f64::INFINITY;
let mut best_log2_base = 0_u64;
let range: Vec<_> = if level == 1 {
(1..=(max_level / level)).collect()
} else {
// we know a max is between 1 and prev_best_log2_base
// and the curve has only 1 maximum close to prev_best_log2_base
// so we start on prev_best_log2_base
(1..=prev_best_log2_base).rev().collect()
};
// we know a max is between 1 and prev_best_log2_base
// and the curve has only 1 maximum close to prev_best_log2_base
// so we start on prev_best_log2_base
let range = (1..=prev_best_log2_base).rev();
for log2_base in range {
let base_noise = noise_atomic_pattern::variance_bootstrap(
pbs_param(level, log2_base),
@@ -81,7 +83,7 @@ pub fn pareto_quantities(
continue;
}
let params = pbs_param(level, best_log2_base);
let complexity_pbs = PbsComplexity::default().complexity(params, ciphertext_modulus_log);
let complexity_pbs = complexity_model.pbs_complexity(params, ciphertext_modulus_log);
quantities.push(BrComplexityNoise {
decomp: params.br_decomposition_parameter,
@@ -118,19 +120,33 @@ impl Cache {
pub type PersistDecompCache = PersistentCacheHashMap<MacroParam, Vec<BrComplexityNoise>>;
pub fn cache(security_level: u64) -> PersistDecompCache {
pub fn cache(
security_level: u64,
processing_unit: config::ProcessingUnit,
complexity_model: Option<Arc<dyn ComplexityModel>>,
) -> PersistDecompCache {
let max_log2_base = processing_unit.max_br_base_log();
let ciphertext_modulus_log = 64;
let tmp: String = std::env::temp_dir()
.to_str()
.expect("Invalid tmp dir")
.into();
let path = format!("{tmp}/optimizer/cache/br-decomp-cpu-64-{security_level}");
let hardware = processing_unit.br_to_string();
let path = format!("{tmp}/optimizer/cache/br-decomp-{hardware}-64-{security_level}");
let complexity_model = complexity_model.unwrap_or_else(|| processing_unit.complexity_model());
let function = move |(glwe_params, internal_dim): MacroParam| {
pareto_quantities(
complexity_model.as_ref(),
ciphertext_modulus_log,
security_level,
internal_dim,
glwe_params,
max_log2_base,
)
};
PersistentCacheHashMap::new(&path, "v0", function)

View File

@@ -1,8 +1,11 @@
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use concrete_commons::dispersion::DispersionParameter;
use crate::computing_cost::operators::keyswitch_lwe::KsComplexity;
use crate::computing_cost::complexity_model::ComplexityModel;
use crate::config;
use crate::noise_estimator::operators::atomic_pattern as noise_atomic_pattern;
use crate::parameters::{
GlweParameters, KeyswitchParameters, KsDecompositionParameters, LweDimension,
@@ -31,6 +34,7 @@ impl ComplexityNoise for KsComplexityNoise {
/* This is stricly variance decreasing and strictly complexity increasing */
pub fn pareto_quantities(
complexity_model: &dyn ComplexityModel,
ciphertext_modulus_log: u32,
security_level: u64,
internal_dim: u64,
@@ -54,20 +58,18 @@ pub fn pareto_quantities(
let mut increasing_complexity = 0.0;
let mut decreasing_variance = f64::INFINITY;
let mut counting_no_progress = 0;
let mut prev_best_log2_base = 0_u64;
let max_level = ciphertext_modulus_log as u64;
for level in 1..=max_level {
let mut prev_best_log2_base = ciphertext_modulus_log as u64;
for level in 1..=ciphertext_modulus_log as u64 {
// detect increasing noise
let mut level_decreasing_base_noise = f64::INFINITY;
let mut best_log2_base = 0_u64;
let range: Vec<_> = if level == 1 {
(1..=(max_level / level)).collect()
} else {
// we know a max is between 1 and prev_best_log2_base
// and the curve has only 1 maximum close to prev_best_log2_base
// so we start on prev_best_log2_base
(1..=prev_best_log2_base).rev().collect()
};
// we know a max is between 1 and prev_best_log2_base
// and the curve has only 1 maximum close to prev_best_log2_base
// so we start on prev_best_log2_base
let range = (1..=prev_best_log2_base).rev();
for log2_base in range {
let noise_keyswitch = noise_atomic_pattern::variance_keyswitch(
ks_param(level, log2_base),
@@ -93,7 +95,8 @@ pub fn pareto_quantities(
continue;
}
let ks_params = ks_param(level, best_log2_base);
let complexity_keyswitch = KsComplexity.complexity(ks_params, ciphertext_modulus_log);
let complexity_keyswitch =
complexity_model.ks_complexity(ks_params, ciphertext_modulus_log);
quantities.push(KsComplexityNoise {
decomp: ks_params.ks_decomposition_parameter,
noise: level_decreasing_base_noise,
@@ -120,15 +123,26 @@ impl Cache {
pub type PersistDecompCache = PersistentCacheHashMap<MacroParam, Vec<KsComplexityNoise>>;
pub fn cache(security_level: u64) -> PersistDecompCache {
pub fn cache(
security_level: u64,
processing_unit: config::ProcessingUnit,
complexity_model: Option<Arc<dyn ComplexityModel>>,
) -> PersistDecompCache {
let ciphertext_modulus_log = 64;
let tmp: String = std::env::temp_dir()
.to_str()
.expect("Invalid tmp dir")
.into();
let path = format!("{tmp}/optimizer/cache/ks-decomp-cpu-64-{security_level}");
let hardware = processing_unit.ks_to_string();
let path = format!("{tmp}/optimizer/cache/ks-decomp-{hardware}-64-{security_level}");
let complexity_model = complexity_model.unwrap_or_else(|| processing_unit.complexity_model());
let function = move |(glwe_params, internal_dim): MacroParam| {
pareto_quantities(
complexity_model.as_ref(),
ciphertext_modulus_log,
security_level,
internal_dim,

View File

@@ -3,17 +3,30 @@ pub mod common;
pub mod cut;
pub mod keyswitch;
use std::sync::Arc;
pub use common::MacroParam;
pub use cut::cut_complexity_noise;
use crate::computing_cost::complexity_model::ComplexityModel;
use crate::config;
pub struct PersistDecompCache {
pub ks: keyswitch::PersistDecompCache,
pub br: blind_rotate::PersistDecompCache,
}
pub fn cache(security_level: u64) -> PersistDecompCache {
pub fn cache(
security_level: u64,
processing_unit: config::ProcessingUnit,
complexity_model: Option<Arc<dyn ComplexityModel>>,
) -> PersistDecompCache {
PersistDecompCache {
ks: keyswitch::cache(security_level),
br: blind_rotate::cache(security_level),
ks: keyswitch::cache(security_level, processing_unit, complexity_model.clone()),
br: blind_rotate::cache(security_level, processing_unit, complexity_model),
}
}
trait ComplexityModelClone: ComplexityModel + Clone {}
impl<T: ComplexityModel + Clone> ComplexityModelClone for T {}

View File

@@ -10,6 +10,7 @@
use clap::Parser;
use concrete_optimizer::computing_cost::cpu::CpuComplexity;
use concrete_optimizer::config;
use concrete_optimizer::global_parameters::DEFAUT_DOMAINS;
use concrete_optimizer::optimization::atomic_pattern::{
self as optimize_atomic_pattern, OptimizationState,
@@ -84,6 +85,8 @@ pub struct Args {
}
pub fn all_results(args: &Args) -> Vec<Vec<OptimizationState>> {
let processing_unit = config::ProcessingUnit::Cpu;
let sum_size = args.sum_size;
let maximum_acceptable_error_probability = args.p_error;
let security_level = args.security_level;
@@ -108,7 +111,7 @@ pub fn all_results(args: &Args) -> Vec<Vec<OptimizationState>> {
complexity_model: &CpuComplexity::default(),
};
let cache = decomposition::cache(config.security_level);
let cache = decomposition::cache(config.security_level, processing_unit, None);
precisions_iter
.map(|precision| {