diff --git a/charts/src/bin/norm2_complexity.rs b/charts/src/bin/norm2_complexity.rs index cd87141ac..c20fcc6fd 100644 --- a/charts/src/bin/norm2_complexity.rs +++ b/charts/src/bin/norm2_complexity.rs @@ -1,5 +1,6 @@ use charts::{draw, Serie}; use concrete_optimizer::computing_cost::cpu::CpuComplexity; +use concrete_optimizer::config; use concrete_optimizer::global_parameters::DEFAUT_DOMAINS; use concrete_optimizer::optimization::atomic_pattern::{self as optimize_atomic_pattern}; use concrete_optimizer::optimization::config::{Config, SearchSpace}; @@ -25,6 +26,8 @@ fn main() -> Result<(), Box> { let glwe_dimensions: Vec<_> = (1..=6).collect(); let internal_lwe_dimensions: Vec<_> = (MIN_LWE_DIM..=MAX_LWE_DIM).step_by(10).collect(); + let processing_unit = config::ProcessingUnit::Cpu; + let search_space = SearchSpace { glwe_log_polynomial_sizes, glwe_dimensions, @@ -41,7 +44,7 @@ fn main() -> Result<(), Box> { complexity_model: &CpuComplexity::default(), }; - let cache = decomposition::cache(security_level); + let cache = decomposition::cache(security_level, processing_unit, None); let solutions: Vec<_> = log_norm2s .clone() diff --git a/charts/src/bin/precision_complexity.rs b/charts/src/bin/precision_complexity.rs index b53e72190..09fd64695 100644 --- a/charts/src/bin/precision_complexity.rs +++ b/charts/src/bin/precision_complexity.rs @@ -1,5 +1,6 @@ use charts::{draw, Serie}; use concrete_optimizer::computing_cost::cpu::CpuComplexity; +use concrete_optimizer::config; use concrete_optimizer::global_parameters::DEFAUT_DOMAINS; use concrete_optimizer::optimization::atomic_pattern::{self as optimize_atomic_pattern}; use concrete_optimizer::optimization::config::{Config, SearchSpace}; @@ -18,6 +19,8 @@ pub const MIN_LWE_DIM: u64 = DEFAUT_DOMAINS.free_glwe.glwe_dimension.start as u6 pub const MAX_LWE_DIM: u64 = DEFAUT_DOMAINS.free_glwe.glwe_dimension.end as u64 - 1; fn main() -> Result<(), Box> { + let processing_unit = config::ProcessingUnit::Cpu; + let sum_size = 4096; let p_error = _4_SIGMA; let security_level = 128; @@ -41,7 +44,7 @@ fn main() -> Result<(), Box> { complexity_model: &CpuComplexity::default(), }; - let cache = decomposition::cache(security_level); + let cache = decomposition::cache(security_level, processing_unit, None); let solutions: Vec<_> = precisions .clone() diff --git a/concrete-optimizer-cpp/src/concrete-optimizer.rs b/concrete-optimizer-cpp/src/concrete-optimizer.rs index e89bd8714..79faeaaf6 100644 --- a/concrete-optimizer-cpp/src/concrete-optimizer.rs +++ b/concrete-optimizer-cpp/src/concrete-optimizer.rs @@ -1,4 +1,5 @@ use concrete_optimizer::computing_cost::cpu::CpuComplexity; +use concrete_optimizer::config; use concrete_optimizer::dag::operator::{ self, FunctionTable, LevelledComplexity, OperatorIndex, Precision, Shape, }; @@ -22,6 +23,8 @@ fn no_dag_solution() -> ffi::DagSolution { } fn optimize_bootstrap(precision: u64, noise_factor: f64, options: ffi::Options) -> ffi::Solution { + let processing_unit = config::ProcessingUnit::Cpu; + let config = Config { security_level: options.security_level, maximum_acceptable_error_probability: options.maximum_acceptable_error_probability, @@ -31,7 +34,7 @@ fn optimize_bootstrap(precision: u64, noise_factor: f64, options: ffi::Options) let sum_size = 1; - let search_space = SearchSpace::default(); + let search_space = SearchSpace::default(processing_unit); let result = concrete_optimizer::optimization::atomic_pattern::optimize_one( sum_size, @@ -39,7 +42,7 @@ fn optimize_bootstrap(precision: u64, noise_factor: f64, options: ffi::Options) config, noise_factor, &search_space, - &decomposition::cache(options.security_level), + &decomposition::cache(options.security_level, processing_unit, None), ); result .best_solution @@ -199,6 +202,7 @@ impl OperationDag { } fn optimize_v0(&self, options: ffi::Options) -> ffi::Solution { + let processing_unit = config::ProcessingUnit::Cpu; let config = Config { security_level: options.security_level, maximum_acceptable_error_probability: options.maximum_acceptable_error_probability, @@ -206,13 +210,13 @@ impl OperationDag { complexity_model: &CpuComplexity::default(), }; - let search_space = SearchSpace::default(); + let search_space = SearchSpace::default(processing_unit); let result = concrete_optimizer::optimization::dag::solo_key::optimize::optimize( &self.0, config, &search_space, - &decomposition::cache(options.security_level), + &decomposition::cache(options.security_level, processing_unit, None), ); result .best_solution @@ -220,6 +224,7 @@ impl OperationDag { } fn optimize(&self, options: ffi::Options) -> ffi::DagSolution { + let processing_unit = config::ProcessingUnit::Cpu; let config = Config { security_level: options.security_level, maximum_acceptable_error_probability: options.maximum_acceptable_error_probability, @@ -227,8 +232,8 @@ impl OperationDag { complexity_model: &CpuComplexity::default(), }; - let search_space = SearchSpace::default(); - let cache = decomposition::cache(options.security_level); + let search_space = SearchSpace::default(processing_unit); + let cache = decomposition::cache(options.security_level, processing_unit, None); let result = concrete_optimizer::optimization::dag::solo_key::optimize_generic::optimize( &self.0, diff --git a/concrete-optimizer/src/computing_cost/atomic_pattern.rs b/concrete-optimizer/src/computing_cost/atomic_pattern.rs index 36eb238d8..a89d01345 100644 --- a/concrete-optimizer/src/computing_cost/atomic_pattern.rs +++ b/concrete-optimizer/src/computing_cost/atomic_pattern.rs @@ -2,6 +2,7 @@ use super::complexity::Complexity; use super::complexity_model::ComplexityModel; use crate::parameters::AtomicPatternParameters; +#[allow(dead_code)] pub fn atomic_pattern_complexity( complexity_model: &dyn ComplexityModel, sum_size: u64, diff --git a/concrete-optimizer/src/computing_cost/cpu.rs b/concrete-optimizer/src/computing_cost/cpu.rs index 71385aba8..cf255395e 100644 --- a/concrete-optimizer/src/computing_cost/cpu.rs +++ b/concrete-optimizer/src/computing_cost/cpu.rs @@ -4,6 +4,7 @@ use super::operators::keyswitch_lwe::KsComplexity; use super::operators::{keyswitch_lwe, pbs}; use crate::parameters::{KeyswitchParameters, LweDimension, PbsParameters}; +#[derive(Clone)] pub struct CpuComplexity { pub ks_lwe: keyswitch_lwe::KsComplexity, pub pbs: pbs::PbsComplexity, diff --git a/concrete-optimizer/src/computing_cost/fft.rs b/concrete-optimizer/src/computing_cost/fft.rs index 7cd3ff846..3a81694c3 100644 --- a/concrete-optimizer/src/computing_cost/fft.rs +++ b/concrete-optimizer/src/computing_cost/fft.rs @@ -1,6 +1,7 @@ use super::complexity::Complexity; /** Standard fft complexity model */ +#[derive(Clone)] pub struct AsymptoticWithFactors { factor_fft: f64, // factor applied on asymptotic complexity factor_ifft: f64, // factor applied on asymptotic complexity @@ -32,14 +33,6 @@ impl Default for AsymptoticWithFactors { #[cfg(test)] pub mod tests { use crate::computing_cost::fft; - use crate::computing_cost::fft::AsymptoticWithFactors; - - /** Standard fft complexity with X factors*/ - pub const COST_AWS: AsymptoticWithFactors = AsymptoticWithFactors { - // https://github.com/zama-ai/concrete-optimizer/blob/prototype/python/optimizer/noise_formulas/bootstrap.py#L150 - factor_fft: 0.202_926_951_153_089_17, - factor_ifft: 0.407_795_078_512_891, - }; #[test] fn golden_python_prototype() { diff --git a/concrete-optimizer/src/computing_cost/gpu.rs b/concrete-optimizer/src/computing_cost/gpu.rs index fcd3fedd9..e1581d32a 100644 --- a/concrete-optimizer/src/computing_cost/gpu.rs +++ b/concrete-optimizer/src/computing_cost/gpu.rs @@ -4,125 +4,52 @@ use crate::parameters::{KeyswitchParameters, LweDimension, PbsParameters}; use crate::utils::square; #[derive(Clone, Copy)] -pub struct GpuPbsComplexity { - pub w1: f64, - pub w2: f64, - pub w3: f64, - pub w4: f64, - pub occupancy: f64, -} - -//https://github.com/zama-ai/concrete-core-internal/issues/91 -impl GpuPbsComplexity { - pub fn default_lowlat_u64(occupancy: f64) -> Self { - Self { - w1: 2_576.105_013_4, - w2: -21_631.382_229_52, - w3: -86_525.527_535_17, - w4: 0.125_472_398_538_904_43, - occupancy, - } - } +pub enum GpuPbsComplexity { + Lowlat, + Amortized, } #[derive(Clone, Copy)] -pub struct GpuKsComplexity { - pub w1: f64, - pub w2: f64, - pub w3: f64, - pub w4: f64, - pub occupancy: f64, - pub number_of_sm: u64, -} - -// https://github.com/zama-ai/concrete-core-internal/issues/90 -impl GpuKsComplexity { - pub fn default_u64(occupancy: f64, number_of_sm: u64) -> Self { - Self { - w1: 7_959.869_676_54, - w2: 3_866.817_732_87, - w3: 8_353.484_127_44, - w4: 0.125_472_398_538_904_43, - occupancy, - number_of_sm, - } - } -} +pub struct GpuKsComplexity; #[derive(Clone, Copy)] pub struct GpuComplexity { pub ks: GpuKsComplexity, pub pbs: GpuPbsComplexity, - pub ncores: u64, + pub number_of_sm: u64, +} + +impl GpuComplexity { + pub fn default_lowlat_u64(number_of_sm: u64) -> Self { + Self { + ks: GpuKsComplexity, + pbs: GpuPbsComplexity::Lowlat, + number_of_sm, + } + } + + pub fn default_amortized_u64(number_of_sm: u64) -> Self { + Self { + ks: GpuKsComplexity, + pbs: GpuPbsComplexity::Amortized, + number_of_sm, + } + } } impl ComplexityModel for GpuComplexity { #[allow(clippy::let_and_return, non_snake_case)] - fn pbs_complexity(&self, params: PbsParameters, _ciphertext_modulus_log: u32) -> Complexity { - let GpuPbsComplexity { - w1, - w2, - w3, - w4, - occupancy, - } = self.pbs; - - let n = params.internal_lwe_dimension.0 as f64; - let k = params.output_glwe_params.glwe_dimension as f64; - let N = (1 << params.output_glwe_params.log2_polynomial_size) as f64; - - let ell = params.br_decomposition_parameter.level as f64; - - let number_of_ct = 1.; - - let number_of_operations = number_of_ct * algorithmic_complexity_pbs(n, k, N, ell); - - let size = std::mem::size_of::() as f64; - - let pbs_cost = w4 * number_of_operations / (self.ncores as f64 * occupancy) - + (w1 * n * (2. + ell * N * square(k + 1.)) - + 2. * N * ell * (w2 + w3 * square(k + 1.))) - * size; - - pbs_cost + fn pbs_complexity(&self, _params: PbsParameters, _ciphertext_modulus_log: u32) -> Complexity { + todo!() } #[allow(clippy::let_and_return)] fn ks_complexity( &self, - params: KeyswitchParameters, - ciphertext_modulus_log: u32, + _params: KeyswitchParameters, + _ciphertext_modulus_log: u32, ) -> Complexity { - let GpuKsComplexity { - w1, - w2, - w3, - w4, - occupancy, - number_of_sm, - } = self.ks; - - let na = params.input_lwe_dimension.0 as f64; - - let nb = params.output_lwe_dimension.0 as f64; - - let ell = params.ks_decomposition_parameter.level as f64; - - let number_of_ct = 1.; - - let number_of_operations = - number_of_ct * algorithmic_complexity_ks(na, nb, ell, ciphertext_modulus_log as f64); - - let size = std::mem::size_of::() as f64; - - let ks_cost = w4 * number_of_operations / (self.ncores as f64 * occupancy) - + w1 * (number_of_ct * ((na + 1.) + (nb + 1.)) + ell * (nb + 1.) * na) * size - + w2 * number_of_ct * nb * size - + w3 * (number_of_ct / number_of_ct.min(number_of_sm as f64 * 12.)).ceil() - * ((na + 1.) + (nb + 1.)) - + ell * (nb + 1.) * size; - - ks_cost + todo!() } fn levelled_complexity( @@ -136,6 +63,7 @@ impl ComplexityModel for GpuComplexity { } #[allow(non_snake_case)] +#[allow(dead_code)] fn algorithmic_complexity_pbs(n: f64, k: f64, N: f64, ell: f64) -> f64 { n * (ell * (k + 1.) * N * (N.log2() + 1.) + (k + 1.) * N * (N.log2() + 1.) @@ -143,6 +71,7 @@ fn algorithmic_complexity_pbs(n: f64, k: f64, N: f64, ell: f64) -> f64 { } #[allow(non_snake_case)] +#[allow(dead_code)] fn algorithmic_complexity_ks(na: f64, nb: f64, ell: f64, log2_q: f64) -> f64 { na * nb * ell * log2_q } diff --git a/concrete-optimizer/src/computing_cost/mod.rs b/concrete-optimizer/src/computing_cost/mod.rs index 9b2ad7aa2..019fc6590 100644 --- a/concrete-optimizer/src/computing_cost/mod.rs +++ b/concrete-optimizer/src/computing_cost/mod.rs @@ -1,7 +1,7 @@ -pub mod atomic_pattern; +mod atomic_pattern; pub mod complexity; pub mod complexity_model; pub mod cpu; -pub mod fft; +mod fft; pub mod gpu; pub mod operators; diff --git a/concrete-optimizer/src/computing_cost/operators/cmux.rs b/concrete-optimizer/src/computing_cost/operators/cmux.rs index 8a594bea3..88fadf153 100644 --- a/concrete-optimizer/src/computing_cost/operators/cmux.rs +++ b/concrete-optimizer/src/computing_cost/operators/cmux.rs @@ -3,6 +3,7 @@ use super::super::fft; use crate::parameters::CmuxParameters; use crate::utils::square; +#[derive(Clone)] pub struct SimpleWithFactors { fft: fft::AsymptoticWithFactors, blind_rotate_factor: f64, diff --git a/concrete-optimizer/src/computing_cost/operators/keyswitch_lwe.rs b/concrete-optimizer/src/computing_cost/operators/keyswitch_lwe.rs index 312970499..31b33fc6f 100644 --- a/concrete-optimizer/src/computing_cost/operators/keyswitch_lwe.rs +++ b/concrete-optimizer/src/computing_cost/operators/keyswitch_lwe.rs @@ -1,6 +1,7 @@ use super::super::complexity::Complexity; use crate::parameters::KeyswitchParameters; +#[derive(Clone)] pub struct KsComplexity; impl KsComplexity { diff --git a/concrete-optimizer/src/computing_cost/operators/mod.rs b/concrete-optimizer/src/computing_cost/operators/mod.rs index 811ed0bc4..41228f789 100644 --- a/concrete-optimizer/src/computing_cost/operators/mod.rs +++ b/concrete-optimizer/src/computing_cost/operators/mod.rs @@ -1,3 +1,3 @@ pub mod cmux; -pub mod keyswitch_lwe; -pub mod pbs; +pub(super) mod keyswitch_lwe; +pub(super) mod pbs; diff --git a/concrete-optimizer/src/computing_cost/operators/pbs.rs b/concrete-optimizer/src/computing_cost/operators/pbs.rs index 6e6b90e0c..ccf4bf60b 100644 --- a/concrete-optimizer/src/computing_cost/operators/pbs.rs +++ b/concrete-optimizer/src/computing_cost/operators/pbs.rs @@ -2,7 +2,7 @@ use super::super::complexity::Complexity; use super::cmux; use crate::parameters::PbsParameters; -#[derive(Default)] +#[derive(Default, Clone)] pub struct PbsComplexity { pub cmux: cmux::SimpleWithFactors, } diff --git a/concrete-optimizer/src/config.rs b/concrete-optimizer/src/config.rs new file mode 100644 index 000000000..0889494c2 --- /dev/null +++ b/concrete-optimizer/src/config.rs @@ -0,0 +1,63 @@ +use std::sync::Arc; + +use crate::computing_cost::complexity_model::ComplexityModel; +use crate::computing_cost::cpu::CpuComplexity; +use crate::computing_cost::gpu::GpuComplexity; +use crate::optimization::config::{MAX_LOG2_BASE_CPU, MAX_LOG2_BASE_GPU}; + +#[derive(Clone, Copy)] +pub enum ProcessingUnit { + Cpu, + Gpu { + pbs_type: GpuPbsType, + number_of_sm: u64, + }, +} + +#[derive(Clone, Copy)] +pub enum GpuPbsType { + Lowlat, + Amortized, +} + +impl ProcessingUnit { + pub fn max_br_base_log(self) -> u64 { + match self { + Self::Cpu => MAX_LOG2_BASE_CPU, + Self::Gpu { .. } => MAX_LOG2_BASE_GPU, + } + } + + pub fn ks_to_string(self) -> &'static str { + match self { + Self::Cpu => "cpu", + Self::Gpu { .. } => "gpu", + } + } + pub fn br_to_string(self) -> &'static str { + match self { + Self::Cpu => "cpu", + Self::Gpu { + pbs_type: GpuPbsType::Lowlat, + .. + } => "gpu_lowlat", + Self::Gpu { + pbs_type: GpuPbsType::Amortized, + .. + } => "gpu_amortized", + } + } + pub fn complexity_model(self) -> Arc { + match self { + Self::Cpu => Arc::new(CpuComplexity::default()), + Self::Gpu { + pbs_type: GpuPbsType::Amortized, + number_of_sm, + } => Arc::new(GpuComplexity::default_amortized_u64(number_of_sm)), + Self::Gpu { + pbs_type: GpuPbsType::Lowlat, + number_of_sm, + } => Arc::new(GpuComplexity::default_lowlat_u64(number_of_sm)), + } + } +} diff --git a/concrete-optimizer/src/lib.rs b/concrete-optimizer/src/lib.rs index 685f9bc93..204004ab2 100644 --- a/concrete-optimizer/src/lib.rs +++ b/concrete-optimizer/src/lib.rs @@ -19,6 +19,7 @@ pub mod computing_cost; +pub mod config; pub mod dag; pub mod global_parameters; pub mod noise_estimator; diff --git a/concrete-optimizer/src/optimization/config.rs b/concrete-optimizer/src/optimization/config.rs index d5737a5d6..97cdab57b 100644 --- a/concrete-optimizer/src/optimization/config.rs +++ b/concrete-optimizer/src/optimization/config.rs @@ -1,4 +1,6 @@ use crate::computing_cost::complexity_model::ComplexityModel; +use crate::config; +use crate::config::GpuPbsType; use crate::global_parameters::DEFAUT_DOMAINS; #[derive(Clone, Copy, Debug)] @@ -23,8 +25,8 @@ pub struct SearchSpace { pub internal_lwe_dimensions: Vec, } -impl Default for SearchSpace { - fn default() -> Self { +impl SearchSpace { + pub fn default_cpu() -> Self { let glwe_log_polynomial_sizes: Vec = DEFAUT_DOMAINS .glwe_pbs_constrained .log2_polynomial_size @@ -38,4 +40,55 @@ impl Default for SearchSpace { internal_lwe_dimensions, } } + + pub fn default_gpu_lowlat() -> Self { + // https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_low_latency.cu#L156 + let glwe_log_polynomial_sizes: Vec = (9..=11).collect(); + + // https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_low_latency.cu#L154 + let glwe_dimensions: Vec = vec![1]; + + let internal_lwe_dimensions: Vec = DEFAUT_DOMAINS.free_glwe.glwe_dimension.as_vec(); + + Self { + glwe_log_polynomial_sizes, + glwe_dimensions, + internal_lwe_dimensions, + } + } + + pub fn default_gpu_amortized() -> Self { + // https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_amortized.cu#L79 + let glwe_log_polynomial_sizes: Vec = (9..=13).collect(); + + // https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_amortized.cu#L78 + let glwe_dimensions: Vec = vec![1]; + + let internal_lwe_dimensions: Vec = DEFAUT_DOMAINS.free_glwe.glwe_dimension.as_vec(); + + Self { + glwe_log_polynomial_sizes, + glwe_dimensions, + internal_lwe_dimensions, + } + } + pub fn default(processing_unit: config::ProcessingUnit) -> Self { + match processing_unit { + config::ProcessingUnit::Cpu => Self::default_cpu(), + config::ProcessingUnit::Gpu { + pbs_type: GpuPbsType::Amortized, + .. + } => Self::default_gpu_amortized(), + config::ProcessingUnit::Gpu { + pbs_type: GpuPbsType::Lowlat, + .. + } => Self::default_gpu_lowlat(), + } + } } + +// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_amortized.cu#L77 +// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_low_latency.cu#L153 +pub const MAX_LOG2_BASE_GPU: u64 = 16; + +pub const MAX_LOG2_BASE_CPU: u64 = 64; diff --git a/concrete-optimizer/src/optimization/dag/solo_key/optimize.rs b/concrete-optimizer/src/optimization/dag/solo_key/optimize.rs index bdf63f1a9..f209d2996 100644 --- a/concrete-optimizer/src/optimization/dag/solo_key/optimize.rs +++ b/concrete-optimizer/src/optimization/dag/solo_key/optimize.rs @@ -359,12 +359,12 @@ mod tests { use super::*; use crate::computing_cost::cpu::CpuComplexity; + use crate::config; use crate::dag::operator::{FunctionTable, Shape, Weights}; use crate::noise_estimator::p_error::repeat_p_error; - use crate::optimization::atomic_pattern; use crate::optimization::config::SearchSpace; use crate::optimization::dag::solo_key::symbolic_variance::VarianceOrigin; - use crate::optimization::decomposition; + use crate::optimization::{atomic_pattern, decomposition}; use crate::utils::square; fn small_relative_diff(v1: f64, v2: f64) -> bool { @@ -399,7 +399,7 @@ mod tests { complexity_model: &CpuComplexity::default(), }; - let search_space = SearchSpace::default(); + let search_space = SearchSpace::default_cpu(); super::optimize(dag, config, &search_space, cache) } @@ -429,7 +429,9 @@ mod tests { } fn v0_parameter_ref(precision: u64, weight: u64, times: &mut Times) { - let search_space = SearchSpace::default(); + let processing_unit = config::ProcessingUnit::Cpu; + + let search_space = SearchSpace::default(processing_unit); let sum_size = 1; @@ -440,7 +442,7 @@ mod tests { complexity_model: &CpuComplexity::default(), }; - let cache = decomposition::cache(config.security_level); + let cache = decomposition::cache(config.security_level, processing_unit, None); let _ = optimize_v0( sum_size, @@ -499,9 +501,10 @@ mod tests { } fn v0_parameter_ref_with_dot(precision: Precision, weight: i64) { + let processing_unit = config::ProcessingUnit::Cpu; let security_level = 128; - let cache = decomposition::cache(security_level); + let cache = decomposition::cache(security_level, processing_unit, None); let mut dag = unparametrized::OperationDag::new(); { @@ -530,7 +533,7 @@ mod tests { assert_f64_eq(square(weight) as f64, constraint.pareto_in_lut[0].lut_coeff); } - let search_space = SearchSpace::default(); + let search_space = SearchSpace::default(processing_unit); let config = Config { security_level, @@ -589,7 +592,8 @@ mod tests { } #[test] fn test_lut_vs_no_lut() { - let cache = decomposition::cache(128); + let processing_unit = config::ProcessingUnit::Cpu; + let cache = decomposition::cache(128, processing_unit, None); for precision in 1..=8 { no_lut_vs_lut(precision, &cache); } @@ -632,7 +636,8 @@ mod tests { #[test] fn test_lut_with_input_base_noise_better_than_lut_with_lut_base_noise() { - let cache = decomposition::cache(128); + let processing_unit = config::ProcessingUnit::Cpu; + let cache = decomposition::cache(128, processing_unit, None); for log_weight in 1..=16 { let weight = 1 << log_weight; for precision in 5..=9 { @@ -666,7 +671,8 @@ mod tests { #[test] fn test_lut_1_layer_is_better() { - let cache = decomposition::cache(128); + let processing_unit = config::ProcessingUnit::Cpu; + let cache = decomposition::cache(128, processing_unit, None); // for some reason on 4, 5, 6, the complexity is already minimal // this could be due to pre-defined pareto set for precision in [1, 2, 3, 7, 8] { @@ -722,7 +728,8 @@ mod tests { #[test] fn test_multi_precision_dominate_single() { - let cache = decomposition::cache(128); + let processing_unit = config::ProcessingUnit::Cpu; + let cache = decomposition::cache(128, processing_unit, None); let mut prev = Some(true); // true -> ... -> true -> false -> ... -> false for log2_weight in 0..29 { let weight = 1 << log2_weight; @@ -756,7 +763,8 @@ mod tests { #[test] fn test_global_p_error_input() { - let cache = decomposition::cache(128); + let processing_unit = config::ProcessingUnit::Cpu; + let cache = decomposition::cache(128, processing_unit, None); for precision in [4_u8, 8] { for weight in [1, 3, 27, 243, 729] { for dim in [1, 2, 16, 32] { @@ -786,7 +794,8 @@ mod tests { #[test] fn test_global_p_error_lut() { - let cache = decomposition::cache(128); + let processing_unit = config::ProcessingUnit::Cpu; + let cache = decomposition::cache(128, processing_unit, None); for precision in [4_u8, 8] { for weight in [1, 3, 27, 243, 729] { for depth in [2, 16, 32] { @@ -847,7 +856,8 @@ mod tests { #[allow(clippy::unnecessary_cast)] // clippy bug refusing as Precision on const #[test] fn test_global_p_error_dominating_lut() { - let cache = decomposition::cache(128); + let processing_unit = config::ProcessingUnit::Cpu; + let cache = decomposition::cache(128, processing_unit, None); let depth = 128; let weights_low = 1; let weights_high = 1; @@ -875,7 +885,8 @@ mod tests { #[allow(clippy::unnecessary_cast)] // clippy bug refusing as Precision on const #[test] fn test_global_p_error_non_dominating_lut() { - let cache = decomposition::cache(128); + let processing_unit = config::ProcessingUnit::Cpu; + let cache = decomposition::cache(128, processing_unit, None); let depth = 128; let weights_low = 1024 * 1024 * 3; let weights_high = 1; diff --git a/concrete-optimizer/src/optimization/decomposition/blind_rotate.rs b/concrete-optimizer/src/optimization/decomposition/blind_rotate.rs index cb1f1248a..620fe9f4f 100644 --- a/concrete-optimizer/src/optimization/decomposition/blind_rotate.rs +++ b/concrete-optimizer/src/optimization/decomposition/blind_rotate.rs @@ -1,13 +1,15 @@ +use std::sync::Arc; + use serde::{Deserialize, Serialize}; use concrete_commons::dispersion::DispersionParameter; -use crate::computing_cost::operators::pbs::PbsComplexity; +use crate::computing_cost::complexity_model::ComplexityModel; use crate::noise_estimator::operators::atomic_pattern as noise_atomic_pattern; use crate::parameters::{BrDecompositionParameters, GlweParameters, LweDimension, PbsParameters}; -use crate::security; use crate::utils::cache::ephemeral::{CacheHashMap, EphemeralCache}; use crate::utils::cache::persistent::PersistentCacheHashMap; +use crate::{config, security}; use super::common::MacroParam; use super::cut::ComplexityNoise; @@ -21,10 +23,12 @@ pub struct BrComplexityNoise { /* This is stricly variance decreasing and strictly complexity increasing */ pub fn pareto_quantities( + complexity_model: &dyn ComplexityModel, ciphertext_modulus_log: u32, security_level: u64, internal_dim: u64, glwe_params: GlweParameters, + max_log2_base: u64, ) -> Vec { assert!(ciphertext_modulus_log == 64); let pbs_param = |level, log2_base| { @@ -38,24 +42,22 @@ pub fn pareto_quantities( let variance_bsk = security::glwe::minimal_variance(glwe_params, ciphertext_modulus_log, security_level); - let mut quantities = Vec::with_capacity(64); + let mut quantities = Vec::with_capacity(max_log2_base as usize); let mut increasing_complexity = 0.0; let mut decreasing_variance = f64::INFINITY; let mut counting_no_progress = 0; - let mut prev_best_log2_base = 0_u64; - let max_level = ciphertext_modulus_log as u64; - for level in 1..=max_level { + + let mut prev_best_log2_base = max_log2_base; + + for level in 1..=ciphertext_modulus_log as u64 { // detect increasing noise let mut level_decreasing_base_noise = f64::INFINITY; let mut best_log2_base = 0_u64; - let range: Vec<_> = if level == 1 { - (1..=(max_level / level)).collect() - } else { - // we know a max is between 1 and prev_best_log2_base - // and the curve has only 1 maximum close to prev_best_log2_base - // so we start on prev_best_log2_base - (1..=prev_best_log2_base).rev().collect() - }; + // we know a max is between 1 and prev_best_log2_base + // and the curve has only 1 maximum close to prev_best_log2_base + // so we start on prev_best_log2_base + let range = (1..=prev_best_log2_base).rev(); + for log2_base in range { let base_noise = noise_atomic_pattern::variance_bootstrap( pbs_param(level, log2_base), @@ -81,7 +83,7 @@ pub fn pareto_quantities( continue; } let params = pbs_param(level, best_log2_base); - let complexity_pbs = PbsComplexity::default().complexity(params, ciphertext_modulus_log); + let complexity_pbs = complexity_model.pbs_complexity(params, ciphertext_modulus_log); quantities.push(BrComplexityNoise { decomp: params.br_decomposition_parameter, @@ -118,19 +120,33 @@ impl Cache { pub type PersistDecompCache = PersistentCacheHashMap>; -pub fn cache(security_level: u64) -> PersistDecompCache { +pub fn cache( + security_level: u64, + processing_unit: config::ProcessingUnit, + complexity_model: Option>, +) -> PersistDecompCache { + let max_log2_base = processing_unit.max_br_base_log(); + let ciphertext_modulus_log = 64; let tmp: String = std::env::temp_dir() .to_str() .expect("Invalid tmp dir") .into(); - let path = format!("{tmp}/optimizer/cache/br-decomp-cpu-64-{security_level}"); + + let hardware = processing_unit.br_to_string(); + + let path = format!("{tmp}/optimizer/cache/br-decomp-{hardware}-64-{security_level}"); + + let complexity_model = complexity_model.unwrap_or_else(|| processing_unit.complexity_model()); + let function = move |(glwe_params, internal_dim): MacroParam| { pareto_quantities( + complexity_model.as_ref(), ciphertext_modulus_log, security_level, internal_dim, glwe_params, + max_log2_base, ) }; PersistentCacheHashMap::new(&path, "v0", function) diff --git a/concrete-optimizer/src/optimization/decomposition/keyswitch.rs b/concrete-optimizer/src/optimization/decomposition/keyswitch.rs index 21e9ed020..3504ab1aa 100644 --- a/concrete-optimizer/src/optimization/decomposition/keyswitch.rs +++ b/concrete-optimizer/src/optimization/decomposition/keyswitch.rs @@ -1,8 +1,11 @@ +use std::sync::Arc; + use serde::{Deserialize, Serialize}; use concrete_commons::dispersion::DispersionParameter; -use crate::computing_cost::operators::keyswitch_lwe::KsComplexity; +use crate::computing_cost::complexity_model::ComplexityModel; +use crate::config; use crate::noise_estimator::operators::atomic_pattern as noise_atomic_pattern; use crate::parameters::{ GlweParameters, KeyswitchParameters, KsDecompositionParameters, LweDimension, @@ -31,6 +34,7 @@ impl ComplexityNoise for KsComplexityNoise { /* This is stricly variance decreasing and strictly complexity increasing */ pub fn pareto_quantities( + complexity_model: &dyn ComplexityModel, ciphertext_modulus_log: u32, security_level: u64, internal_dim: u64, @@ -54,20 +58,18 @@ pub fn pareto_quantities( let mut increasing_complexity = 0.0; let mut decreasing_variance = f64::INFINITY; let mut counting_no_progress = 0; - let mut prev_best_log2_base = 0_u64; - let max_level = ciphertext_modulus_log as u64; - for level in 1..=max_level { + let mut prev_best_log2_base = ciphertext_modulus_log as u64; + + for level in 1..=ciphertext_modulus_log as u64 { // detect increasing noise let mut level_decreasing_base_noise = f64::INFINITY; let mut best_log2_base = 0_u64; - let range: Vec<_> = if level == 1 { - (1..=(max_level / level)).collect() - } else { - // we know a max is between 1 and prev_best_log2_base - // and the curve has only 1 maximum close to prev_best_log2_base - // so we start on prev_best_log2_base - (1..=prev_best_log2_base).rev().collect() - }; + + // we know a max is between 1 and prev_best_log2_base + // and the curve has only 1 maximum close to prev_best_log2_base + // so we start on prev_best_log2_base + let range = (1..=prev_best_log2_base).rev(); + for log2_base in range { let noise_keyswitch = noise_atomic_pattern::variance_keyswitch( ks_param(level, log2_base), @@ -93,7 +95,8 @@ pub fn pareto_quantities( continue; } let ks_params = ks_param(level, best_log2_base); - let complexity_keyswitch = KsComplexity.complexity(ks_params, ciphertext_modulus_log); + let complexity_keyswitch = + complexity_model.ks_complexity(ks_params, ciphertext_modulus_log); quantities.push(KsComplexityNoise { decomp: ks_params.ks_decomposition_parameter, noise: level_decreasing_base_noise, @@ -120,15 +123,26 @@ impl Cache { pub type PersistDecompCache = PersistentCacheHashMap>; -pub fn cache(security_level: u64) -> PersistDecompCache { +pub fn cache( + security_level: u64, + processing_unit: config::ProcessingUnit, + complexity_model: Option>, +) -> PersistDecompCache { let ciphertext_modulus_log = 64; let tmp: String = std::env::temp_dir() .to_str() .expect("Invalid tmp dir") .into(); - let path = format!("{tmp}/optimizer/cache/ks-decomp-cpu-64-{security_level}"); + + let hardware = processing_unit.ks_to_string(); + + let path = format!("{tmp}/optimizer/cache/ks-decomp-{hardware}-64-{security_level}"); + + let complexity_model = complexity_model.unwrap_or_else(|| processing_unit.complexity_model()); + let function = move |(glwe_params, internal_dim): MacroParam| { pareto_quantities( + complexity_model.as_ref(), ciphertext_modulus_log, security_level, internal_dim, diff --git a/concrete-optimizer/src/optimization/decomposition/mod.rs b/concrete-optimizer/src/optimization/decomposition/mod.rs index 185047fff..97bbf47a7 100644 --- a/concrete-optimizer/src/optimization/decomposition/mod.rs +++ b/concrete-optimizer/src/optimization/decomposition/mod.rs @@ -3,17 +3,30 @@ pub mod common; pub mod cut; pub mod keyswitch; +use std::sync::Arc; + pub use common::MacroParam; pub use cut::cut_complexity_noise; +use crate::computing_cost::complexity_model::ComplexityModel; +use crate::config; + pub struct PersistDecompCache { pub ks: keyswitch::PersistDecompCache, pub br: blind_rotate::PersistDecompCache, } -pub fn cache(security_level: u64) -> PersistDecompCache { +pub fn cache( + security_level: u64, + processing_unit: config::ProcessingUnit, + complexity_model: Option>, +) -> PersistDecompCache { PersistDecompCache { - ks: keyswitch::cache(security_level), - br: blind_rotate::cache(security_level), + ks: keyswitch::cache(security_level, processing_unit, complexity_model.clone()), + br: blind_rotate::cache(security_level, processing_unit, complexity_model), } } + +trait ComplexityModelClone: ComplexityModel + Clone {} + +impl ComplexityModelClone for T {} diff --git a/v0-parameters/src/lib.rs b/v0-parameters/src/lib.rs index b7df36553..7acf7cf50 100644 --- a/v0-parameters/src/lib.rs +++ b/v0-parameters/src/lib.rs @@ -10,6 +10,7 @@ use clap::Parser; use concrete_optimizer::computing_cost::cpu::CpuComplexity; +use concrete_optimizer::config; use concrete_optimizer::global_parameters::DEFAUT_DOMAINS; use concrete_optimizer::optimization::atomic_pattern::{ self as optimize_atomic_pattern, OptimizationState, @@ -84,6 +85,8 @@ pub struct Args { } pub fn all_results(args: &Args) -> Vec> { + let processing_unit = config::ProcessingUnit::Cpu; + let sum_size = args.sum_size; let maximum_acceptable_error_probability = args.p_error; let security_level = args.security_level; @@ -108,7 +111,7 @@ pub fn all_results(args: &Args) -> Vec> { complexity_model: &CpuComplexity::default(), }; - let cache = decomposition::cache(config.security_level); + let cache = decomposition::cache(config.security_level, processing_unit, None); precisions_iter .map(|precision| {