mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-08 19:44:57 -05:00
feat(optimizer): add gpu parameter constraints
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
use charts::{draw, Serie};
|
||||
use concrete_optimizer::computing_cost::cpu::CpuComplexity;
|
||||
use concrete_optimizer::config;
|
||||
use concrete_optimizer::global_parameters::DEFAUT_DOMAINS;
|
||||
use concrete_optimizer::optimization::atomic_pattern::{self as optimize_atomic_pattern};
|
||||
use concrete_optimizer::optimization::config::{Config, SearchSpace};
|
||||
@@ -25,6 +26,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let glwe_dimensions: Vec<_> = (1..=6).collect();
|
||||
let internal_lwe_dimensions: Vec<_> = (MIN_LWE_DIM..=MAX_LWE_DIM).step_by(10).collect();
|
||||
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
|
||||
let search_space = SearchSpace {
|
||||
glwe_log_polynomial_sizes,
|
||||
glwe_dimensions,
|
||||
@@ -41,7 +44,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
complexity_model: &CpuComplexity::default(),
|
||||
};
|
||||
|
||||
let cache = decomposition::cache(security_level);
|
||||
let cache = decomposition::cache(security_level, processing_unit, None);
|
||||
|
||||
let solutions: Vec<_> = log_norm2s
|
||||
.clone()
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use charts::{draw, Serie};
|
||||
use concrete_optimizer::computing_cost::cpu::CpuComplexity;
|
||||
use concrete_optimizer::config;
|
||||
use concrete_optimizer::global_parameters::DEFAUT_DOMAINS;
|
||||
use concrete_optimizer::optimization::atomic_pattern::{self as optimize_atomic_pattern};
|
||||
use concrete_optimizer::optimization::config::{Config, SearchSpace};
|
||||
@@ -18,6 +19,8 @@ pub const MIN_LWE_DIM: u64 = DEFAUT_DOMAINS.free_glwe.glwe_dimension.start as u6
|
||||
pub const MAX_LWE_DIM: u64 = DEFAUT_DOMAINS.free_glwe.glwe_dimension.end as u64 - 1;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
|
||||
let sum_size = 4096;
|
||||
let p_error = _4_SIGMA;
|
||||
let security_level = 128;
|
||||
@@ -41,7 +44,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
complexity_model: &CpuComplexity::default(),
|
||||
};
|
||||
|
||||
let cache = decomposition::cache(security_level);
|
||||
let cache = decomposition::cache(security_level, processing_unit, None);
|
||||
|
||||
let solutions: Vec<_> = precisions
|
||||
.clone()
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use concrete_optimizer::computing_cost::cpu::CpuComplexity;
|
||||
use concrete_optimizer::config;
|
||||
use concrete_optimizer::dag::operator::{
|
||||
self, FunctionTable, LevelledComplexity, OperatorIndex, Precision, Shape,
|
||||
};
|
||||
@@ -22,6 +23,8 @@ fn no_dag_solution() -> ffi::DagSolution {
|
||||
}
|
||||
|
||||
fn optimize_bootstrap(precision: u64, noise_factor: f64, options: ffi::Options) -> ffi::Solution {
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
|
||||
let config = Config {
|
||||
security_level: options.security_level,
|
||||
maximum_acceptable_error_probability: options.maximum_acceptable_error_probability,
|
||||
@@ -31,7 +34,7 @@ fn optimize_bootstrap(precision: u64, noise_factor: f64, options: ffi::Options)
|
||||
|
||||
let sum_size = 1;
|
||||
|
||||
let search_space = SearchSpace::default();
|
||||
let search_space = SearchSpace::default(processing_unit);
|
||||
|
||||
let result = concrete_optimizer::optimization::atomic_pattern::optimize_one(
|
||||
sum_size,
|
||||
@@ -39,7 +42,7 @@ fn optimize_bootstrap(precision: u64, noise_factor: f64, options: ffi::Options)
|
||||
config,
|
||||
noise_factor,
|
||||
&search_space,
|
||||
&decomposition::cache(options.security_level),
|
||||
&decomposition::cache(options.security_level, processing_unit, None),
|
||||
);
|
||||
result
|
||||
.best_solution
|
||||
@@ -199,6 +202,7 @@ impl OperationDag {
|
||||
}
|
||||
|
||||
fn optimize_v0(&self, options: ffi::Options) -> ffi::Solution {
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
let config = Config {
|
||||
security_level: options.security_level,
|
||||
maximum_acceptable_error_probability: options.maximum_acceptable_error_probability,
|
||||
@@ -206,13 +210,13 @@ impl OperationDag {
|
||||
complexity_model: &CpuComplexity::default(),
|
||||
};
|
||||
|
||||
let search_space = SearchSpace::default();
|
||||
let search_space = SearchSpace::default(processing_unit);
|
||||
|
||||
let result = concrete_optimizer::optimization::dag::solo_key::optimize::optimize(
|
||||
&self.0,
|
||||
config,
|
||||
&search_space,
|
||||
&decomposition::cache(options.security_level),
|
||||
&decomposition::cache(options.security_level, processing_unit, None),
|
||||
);
|
||||
result
|
||||
.best_solution
|
||||
@@ -220,6 +224,7 @@ impl OperationDag {
|
||||
}
|
||||
|
||||
fn optimize(&self, options: ffi::Options) -> ffi::DagSolution {
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
let config = Config {
|
||||
security_level: options.security_level,
|
||||
maximum_acceptable_error_probability: options.maximum_acceptable_error_probability,
|
||||
@@ -227,8 +232,8 @@ impl OperationDag {
|
||||
complexity_model: &CpuComplexity::default(),
|
||||
};
|
||||
|
||||
let search_space = SearchSpace::default();
|
||||
let cache = decomposition::cache(options.security_level);
|
||||
let search_space = SearchSpace::default(processing_unit);
|
||||
let cache = decomposition::cache(options.security_level, processing_unit, None);
|
||||
|
||||
let result = concrete_optimizer::optimization::dag::solo_key::optimize_generic::optimize(
|
||||
&self.0,
|
||||
|
||||
@@ -2,6 +2,7 @@ use super::complexity::Complexity;
|
||||
use super::complexity_model::ComplexityModel;
|
||||
use crate::parameters::AtomicPatternParameters;
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn atomic_pattern_complexity(
|
||||
complexity_model: &dyn ComplexityModel,
|
||||
sum_size: u64,
|
||||
|
||||
@@ -4,6 +4,7 @@ use super::operators::keyswitch_lwe::KsComplexity;
|
||||
use super::operators::{keyswitch_lwe, pbs};
|
||||
use crate::parameters::{KeyswitchParameters, LweDimension, PbsParameters};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CpuComplexity {
|
||||
pub ks_lwe: keyswitch_lwe::KsComplexity,
|
||||
pub pbs: pbs::PbsComplexity,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use super::complexity::Complexity;
|
||||
|
||||
/** Standard fft complexity model */
|
||||
#[derive(Clone)]
|
||||
pub struct AsymptoticWithFactors {
|
||||
factor_fft: f64, // factor applied on asymptotic complexity
|
||||
factor_ifft: f64, // factor applied on asymptotic complexity
|
||||
@@ -32,14 +33,6 @@ impl Default for AsymptoticWithFactors {
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use crate::computing_cost::fft;
|
||||
use crate::computing_cost::fft::AsymptoticWithFactors;
|
||||
|
||||
/** Standard fft complexity with X factors*/
|
||||
pub const COST_AWS: AsymptoticWithFactors = AsymptoticWithFactors {
|
||||
// https://github.com/zama-ai/concrete-optimizer/blob/prototype/python/optimizer/noise_formulas/bootstrap.py#L150
|
||||
factor_fft: 0.202_926_951_153_089_17,
|
||||
factor_ifft: 0.407_795_078_512_891,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn golden_python_prototype() {
|
||||
|
||||
@@ -4,125 +4,52 @@ use crate::parameters::{KeyswitchParameters, LweDimension, PbsParameters};
|
||||
use crate::utils::square;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct GpuPbsComplexity {
|
||||
pub w1: f64,
|
||||
pub w2: f64,
|
||||
pub w3: f64,
|
||||
pub w4: f64,
|
||||
pub occupancy: f64,
|
||||
}
|
||||
|
||||
//https://github.com/zama-ai/concrete-core-internal/issues/91
|
||||
impl GpuPbsComplexity {
|
||||
pub fn default_lowlat_u64(occupancy: f64) -> Self {
|
||||
Self {
|
||||
w1: 2_576.105_013_4,
|
||||
w2: -21_631.382_229_52,
|
||||
w3: -86_525.527_535_17,
|
||||
w4: 0.125_472_398_538_904_43,
|
||||
occupancy,
|
||||
}
|
||||
}
|
||||
pub enum GpuPbsComplexity {
|
||||
Lowlat,
|
||||
Amortized,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct GpuKsComplexity {
|
||||
pub w1: f64,
|
||||
pub w2: f64,
|
||||
pub w3: f64,
|
||||
pub w4: f64,
|
||||
pub occupancy: f64,
|
||||
pub number_of_sm: u64,
|
||||
}
|
||||
|
||||
// https://github.com/zama-ai/concrete-core-internal/issues/90
|
||||
impl GpuKsComplexity {
|
||||
pub fn default_u64(occupancy: f64, number_of_sm: u64) -> Self {
|
||||
Self {
|
||||
w1: 7_959.869_676_54,
|
||||
w2: 3_866.817_732_87,
|
||||
w3: 8_353.484_127_44,
|
||||
w4: 0.125_472_398_538_904_43,
|
||||
occupancy,
|
||||
number_of_sm,
|
||||
}
|
||||
}
|
||||
}
|
||||
pub struct GpuKsComplexity;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct GpuComplexity {
|
||||
pub ks: GpuKsComplexity,
|
||||
pub pbs: GpuPbsComplexity,
|
||||
pub ncores: u64,
|
||||
pub number_of_sm: u64,
|
||||
}
|
||||
|
||||
impl GpuComplexity {
|
||||
pub fn default_lowlat_u64(number_of_sm: u64) -> Self {
|
||||
Self {
|
||||
ks: GpuKsComplexity,
|
||||
pbs: GpuPbsComplexity::Lowlat,
|
||||
number_of_sm,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_amortized_u64(number_of_sm: u64) -> Self {
|
||||
Self {
|
||||
ks: GpuKsComplexity,
|
||||
pbs: GpuPbsComplexity::Amortized,
|
||||
number_of_sm,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ComplexityModel for GpuComplexity {
|
||||
#[allow(clippy::let_and_return, non_snake_case)]
|
||||
fn pbs_complexity(&self, params: PbsParameters, _ciphertext_modulus_log: u32) -> Complexity {
|
||||
let GpuPbsComplexity {
|
||||
w1,
|
||||
w2,
|
||||
w3,
|
||||
w4,
|
||||
occupancy,
|
||||
} = self.pbs;
|
||||
|
||||
let n = params.internal_lwe_dimension.0 as f64;
|
||||
let k = params.output_glwe_params.glwe_dimension as f64;
|
||||
let N = (1 << params.output_glwe_params.log2_polynomial_size) as f64;
|
||||
|
||||
let ell = params.br_decomposition_parameter.level as f64;
|
||||
|
||||
let number_of_ct = 1.;
|
||||
|
||||
let number_of_operations = number_of_ct * algorithmic_complexity_pbs(n, k, N, ell);
|
||||
|
||||
let size = std::mem::size_of::<u64>() as f64;
|
||||
|
||||
let pbs_cost = w4 * number_of_operations / (self.ncores as f64 * occupancy)
|
||||
+ (w1 * n * (2. + ell * N * square(k + 1.))
|
||||
+ 2. * N * ell * (w2 + w3 * square(k + 1.)))
|
||||
* size;
|
||||
|
||||
pbs_cost
|
||||
fn pbs_complexity(&self, _params: PbsParameters, _ciphertext_modulus_log: u32) -> Complexity {
|
||||
todo!()
|
||||
}
|
||||
|
||||
#[allow(clippy::let_and_return)]
|
||||
fn ks_complexity(
|
||||
&self,
|
||||
params: KeyswitchParameters,
|
||||
ciphertext_modulus_log: u32,
|
||||
_params: KeyswitchParameters,
|
||||
_ciphertext_modulus_log: u32,
|
||||
) -> Complexity {
|
||||
let GpuKsComplexity {
|
||||
w1,
|
||||
w2,
|
||||
w3,
|
||||
w4,
|
||||
occupancy,
|
||||
number_of_sm,
|
||||
} = self.ks;
|
||||
|
||||
let na = params.input_lwe_dimension.0 as f64;
|
||||
|
||||
let nb = params.output_lwe_dimension.0 as f64;
|
||||
|
||||
let ell = params.ks_decomposition_parameter.level as f64;
|
||||
|
||||
let number_of_ct = 1.;
|
||||
|
||||
let number_of_operations =
|
||||
number_of_ct * algorithmic_complexity_ks(na, nb, ell, ciphertext_modulus_log as f64);
|
||||
|
||||
let size = std::mem::size_of::<u64>() as f64;
|
||||
|
||||
let ks_cost = w4 * number_of_operations / (self.ncores as f64 * occupancy)
|
||||
+ w1 * (number_of_ct * ((na + 1.) + (nb + 1.)) + ell * (nb + 1.) * na) * size
|
||||
+ w2 * number_of_ct * nb * size
|
||||
+ w3 * (number_of_ct / number_of_ct.min(number_of_sm as f64 * 12.)).ceil()
|
||||
* ((na + 1.) + (nb + 1.))
|
||||
+ ell * (nb + 1.) * size;
|
||||
|
||||
ks_cost
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn levelled_complexity(
|
||||
@@ -136,6 +63,7 @@ impl ComplexityModel for GpuComplexity {
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
#[allow(dead_code)]
|
||||
fn algorithmic_complexity_pbs(n: f64, k: f64, N: f64, ell: f64) -> f64 {
|
||||
n * (ell * (k + 1.) * N * (N.log2() + 1.)
|
||||
+ (k + 1.) * N * (N.log2() + 1.)
|
||||
@@ -143,6 +71,7 @@ fn algorithmic_complexity_pbs(n: f64, k: f64, N: f64, ell: f64) -> f64 {
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
#[allow(dead_code)]
|
||||
fn algorithmic_complexity_ks(na: f64, nb: f64, ell: f64, log2_q: f64) -> f64 {
|
||||
na * nb * ell * log2_q
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
pub mod atomic_pattern;
|
||||
mod atomic_pattern;
|
||||
pub mod complexity;
|
||||
pub mod complexity_model;
|
||||
pub mod cpu;
|
||||
pub mod fft;
|
||||
mod fft;
|
||||
pub mod gpu;
|
||||
pub mod operators;
|
||||
|
||||
@@ -3,6 +3,7 @@ use super::super::fft;
|
||||
use crate::parameters::CmuxParameters;
|
||||
use crate::utils::square;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SimpleWithFactors {
|
||||
fft: fft::AsymptoticWithFactors,
|
||||
blind_rotate_factor: f64,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use super::super::complexity::Complexity;
|
||||
use crate::parameters::KeyswitchParameters;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct KsComplexity;
|
||||
|
||||
impl KsComplexity {
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
pub mod cmux;
|
||||
pub mod keyswitch_lwe;
|
||||
pub mod pbs;
|
||||
pub(super) mod keyswitch_lwe;
|
||||
pub(super) mod pbs;
|
||||
|
||||
@@ -2,7 +2,7 @@ use super::super::complexity::Complexity;
|
||||
use super::cmux;
|
||||
use crate::parameters::PbsParameters;
|
||||
|
||||
#[derive(Default)]
|
||||
#[derive(Default, Clone)]
|
||||
pub struct PbsComplexity {
|
||||
pub cmux: cmux::SimpleWithFactors,
|
||||
}
|
||||
|
||||
63
concrete-optimizer/src/config.rs
Normal file
63
concrete-optimizer/src/config.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::computing_cost::complexity_model::ComplexityModel;
|
||||
use crate::computing_cost::cpu::CpuComplexity;
|
||||
use crate::computing_cost::gpu::GpuComplexity;
|
||||
use crate::optimization::config::{MAX_LOG2_BASE_CPU, MAX_LOG2_BASE_GPU};
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum ProcessingUnit {
|
||||
Cpu,
|
||||
Gpu {
|
||||
pbs_type: GpuPbsType,
|
||||
number_of_sm: u64,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum GpuPbsType {
|
||||
Lowlat,
|
||||
Amortized,
|
||||
}
|
||||
|
||||
impl ProcessingUnit {
|
||||
pub fn max_br_base_log(self) -> u64 {
|
||||
match self {
|
||||
Self::Cpu => MAX_LOG2_BASE_CPU,
|
||||
Self::Gpu { .. } => MAX_LOG2_BASE_GPU,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ks_to_string(self) -> &'static str {
|
||||
match self {
|
||||
Self::Cpu => "cpu",
|
||||
Self::Gpu { .. } => "gpu",
|
||||
}
|
||||
}
|
||||
pub fn br_to_string(self) -> &'static str {
|
||||
match self {
|
||||
Self::Cpu => "cpu",
|
||||
Self::Gpu {
|
||||
pbs_type: GpuPbsType::Lowlat,
|
||||
..
|
||||
} => "gpu_lowlat",
|
||||
Self::Gpu {
|
||||
pbs_type: GpuPbsType::Amortized,
|
||||
..
|
||||
} => "gpu_amortized",
|
||||
}
|
||||
}
|
||||
pub fn complexity_model(self) -> Arc<dyn ComplexityModel> {
|
||||
match self {
|
||||
Self::Cpu => Arc::new(CpuComplexity::default()),
|
||||
Self::Gpu {
|
||||
pbs_type: GpuPbsType::Amortized,
|
||||
number_of_sm,
|
||||
} => Arc::new(GpuComplexity::default_amortized_u64(number_of_sm)),
|
||||
Self::Gpu {
|
||||
pbs_type: GpuPbsType::Lowlat,
|
||||
number_of_sm,
|
||||
} => Arc::new(GpuComplexity::default_lowlat_u64(number_of_sm)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@
|
||||
|
||||
pub mod computing_cost;
|
||||
|
||||
pub mod config;
|
||||
pub mod dag;
|
||||
pub mod global_parameters;
|
||||
pub mod noise_estimator;
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use crate::computing_cost::complexity_model::ComplexityModel;
|
||||
use crate::config;
|
||||
use crate::config::GpuPbsType;
|
||||
use crate::global_parameters::DEFAUT_DOMAINS;
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
@@ -23,8 +25,8 @@ pub struct SearchSpace {
|
||||
pub internal_lwe_dimensions: Vec<u64>,
|
||||
}
|
||||
|
||||
impl Default for SearchSpace {
|
||||
fn default() -> Self {
|
||||
impl SearchSpace {
|
||||
pub fn default_cpu() -> Self {
|
||||
let glwe_log_polynomial_sizes: Vec<u64> = DEFAUT_DOMAINS
|
||||
.glwe_pbs_constrained
|
||||
.log2_polynomial_size
|
||||
@@ -38,4 +40,55 @@ impl Default for SearchSpace {
|
||||
internal_lwe_dimensions,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_gpu_lowlat() -> Self {
|
||||
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_low_latency.cu#L156
|
||||
let glwe_log_polynomial_sizes: Vec<u64> = (9..=11).collect();
|
||||
|
||||
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_low_latency.cu#L154
|
||||
let glwe_dimensions: Vec<u64> = vec![1];
|
||||
|
||||
let internal_lwe_dimensions: Vec<u64> = DEFAUT_DOMAINS.free_glwe.glwe_dimension.as_vec();
|
||||
|
||||
Self {
|
||||
glwe_log_polynomial_sizes,
|
||||
glwe_dimensions,
|
||||
internal_lwe_dimensions,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_gpu_amortized() -> Self {
|
||||
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_amortized.cu#L79
|
||||
let glwe_log_polynomial_sizes: Vec<u64> = (9..=13).collect();
|
||||
|
||||
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_amortized.cu#L78
|
||||
let glwe_dimensions: Vec<u64> = vec![1];
|
||||
|
||||
let internal_lwe_dimensions: Vec<u64> = DEFAUT_DOMAINS.free_glwe.glwe_dimension.as_vec();
|
||||
|
||||
Self {
|
||||
glwe_log_polynomial_sizes,
|
||||
glwe_dimensions,
|
||||
internal_lwe_dimensions,
|
||||
}
|
||||
}
|
||||
pub fn default(processing_unit: config::ProcessingUnit) -> Self {
|
||||
match processing_unit {
|
||||
config::ProcessingUnit::Cpu => Self::default_cpu(),
|
||||
config::ProcessingUnit::Gpu {
|
||||
pbs_type: GpuPbsType::Amortized,
|
||||
..
|
||||
} => Self::default_gpu_amortized(),
|
||||
config::ProcessingUnit::Gpu {
|
||||
pbs_type: GpuPbsType::Lowlat,
|
||||
..
|
||||
} => Self::default_gpu_lowlat(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_amortized.cu#L77
|
||||
// https://github.com/zama-ai/concrete-core/blob/6b52182ab44c4b39ddebca1c457e1096fb687801/concrete-cuda/cuda/src/bootstrap_low_latency.cu#L153
|
||||
pub const MAX_LOG2_BASE_GPU: u64 = 16;
|
||||
|
||||
pub const MAX_LOG2_BASE_CPU: u64 = 64;
|
||||
|
||||
@@ -359,12 +359,12 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::computing_cost::cpu::CpuComplexity;
|
||||
use crate::config;
|
||||
use crate::dag::operator::{FunctionTable, Shape, Weights};
|
||||
use crate::noise_estimator::p_error::repeat_p_error;
|
||||
use crate::optimization::atomic_pattern;
|
||||
use crate::optimization::config::SearchSpace;
|
||||
use crate::optimization::dag::solo_key::symbolic_variance::VarianceOrigin;
|
||||
use crate::optimization::decomposition;
|
||||
use crate::optimization::{atomic_pattern, decomposition};
|
||||
use crate::utils::square;
|
||||
|
||||
fn small_relative_diff(v1: f64, v2: f64) -> bool {
|
||||
@@ -399,7 +399,7 @@ mod tests {
|
||||
complexity_model: &CpuComplexity::default(),
|
||||
};
|
||||
|
||||
let search_space = SearchSpace::default();
|
||||
let search_space = SearchSpace::default_cpu();
|
||||
|
||||
super::optimize(dag, config, &search_space, cache)
|
||||
}
|
||||
@@ -429,7 +429,9 @@ mod tests {
|
||||
}
|
||||
|
||||
fn v0_parameter_ref(precision: u64, weight: u64, times: &mut Times) {
|
||||
let search_space = SearchSpace::default();
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
|
||||
let search_space = SearchSpace::default(processing_unit);
|
||||
|
||||
let sum_size = 1;
|
||||
|
||||
@@ -440,7 +442,7 @@ mod tests {
|
||||
complexity_model: &CpuComplexity::default(),
|
||||
};
|
||||
|
||||
let cache = decomposition::cache(config.security_level);
|
||||
let cache = decomposition::cache(config.security_level, processing_unit, None);
|
||||
|
||||
let _ = optimize_v0(
|
||||
sum_size,
|
||||
@@ -499,9 +501,10 @@ mod tests {
|
||||
}
|
||||
|
||||
fn v0_parameter_ref_with_dot(precision: Precision, weight: i64) {
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
let security_level = 128;
|
||||
|
||||
let cache = decomposition::cache(security_level);
|
||||
let cache = decomposition::cache(security_level, processing_unit, None);
|
||||
|
||||
let mut dag = unparametrized::OperationDag::new();
|
||||
{
|
||||
@@ -530,7 +533,7 @@ mod tests {
|
||||
assert_f64_eq(square(weight) as f64, constraint.pareto_in_lut[0].lut_coeff);
|
||||
}
|
||||
|
||||
let search_space = SearchSpace::default();
|
||||
let search_space = SearchSpace::default(processing_unit);
|
||||
|
||||
let config = Config {
|
||||
security_level,
|
||||
@@ -589,7 +592,8 @@ mod tests {
|
||||
}
|
||||
#[test]
|
||||
fn test_lut_vs_no_lut() {
|
||||
let cache = decomposition::cache(128);
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
let cache = decomposition::cache(128, processing_unit, None);
|
||||
for precision in 1..=8 {
|
||||
no_lut_vs_lut(precision, &cache);
|
||||
}
|
||||
@@ -632,7 +636,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_lut_with_input_base_noise_better_than_lut_with_lut_base_noise() {
|
||||
let cache = decomposition::cache(128);
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
let cache = decomposition::cache(128, processing_unit, None);
|
||||
for log_weight in 1..=16 {
|
||||
let weight = 1 << log_weight;
|
||||
for precision in 5..=9 {
|
||||
@@ -666,7 +671,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_lut_1_layer_is_better() {
|
||||
let cache = decomposition::cache(128);
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
let cache = decomposition::cache(128, processing_unit, None);
|
||||
// for some reason on 4, 5, 6, the complexity is already minimal
|
||||
// this could be due to pre-defined pareto set
|
||||
for precision in [1, 2, 3, 7, 8] {
|
||||
@@ -722,7 +728,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_multi_precision_dominate_single() {
|
||||
let cache = decomposition::cache(128);
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
let cache = decomposition::cache(128, processing_unit, None);
|
||||
let mut prev = Some(true); // true -> ... -> true -> false -> ... -> false
|
||||
for log2_weight in 0..29 {
|
||||
let weight = 1 << log2_weight;
|
||||
@@ -756,7 +763,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_global_p_error_input() {
|
||||
let cache = decomposition::cache(128);
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
let cache = decomposition::cache(128, processing_unit, None);
|
||||
for precision in [4_u8, 8] {
|
||||
for weight in [1, 3, 27, 243, 729] {
|
||||
for dim in [1, 2, 16, 32] {
|
||||
@@ -786,7 +794,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_global_p_error_lut() {
|
||||
let cache = decomposition::cache(128);
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
let cache = decomposition::cache(128, processing_unit, None);
|
||||
for precision in [4_u8, 8] {
|
||||
for weight in [1, 3, 27, 243, 729] {
|
||||
for depth in [2, 16, 32] {
|
||||
@@ -847,7 +856,8 @@ mod tests {
|
||||
#[allow(clippy::unnecessary_cast)] // clippy bug refusing as Precision on const
|
||||
#[test]
|
||||
fn test_global_p_error_dominating_lut() {
|
||||
let cache = decomposition::cache(128);
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
let cache = decomposition::cache(128, processing_unit, None);
|
||||
let depth = 128;
|
||||
let weights_low = 1;
|
||||
let weights_high = 1;
|
||||
@@ -875,7 +885,8 @@ mod tests {
|
||||
#[allow(clippy::unnecessary_cast)] // clippy bug refusing as Precision on const
|
||||
#[test]
|
||||
fn test_global_p_error_non_dominating_lut() {
|
||||
let cache = decomposition::cache(128);
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
let cache = decomposition::cache(128, processing_unit, None);
|
||||
let depth = 128;
|
||||
let weights_low = 1024 * 1024 * 3;
|
||||
let weights_high = 1;
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use concrete_commons::dispersion::DispersionParameter;
|
||||
|
||||
use crate::computing_cost::operators::pbs::PbsComplexity;
|
||||
use crate::computing_cost::complexity_model::ComplexityModel;
|
||||
use crate::noise_estimator::operators::atomic_pattern as noise_atomic_pattern;
|
||||
use crate::parameters::{BrDecompositionParameters, GlweParameters, LweDimension, PbsParameters};
|
||||
use crate::security;
|
||||
use crate::utils::cache::ephemeral::{CacheHashMap, EphemeralCache};
|
||||
use crate::utils::cache::persistent::PersistentCacheHashMap;
|
||||
use crate::{config, security};
|
||||
|
||||
use super::common::MacroParam;
|
||||
use super::cut::ComplexityNoise;
|
||||
@@ -21,10 +23,12 @@ pub struct BrComplexityNoise {
|
||||
|
||||
/* This is stricly variance decreasing and strictly complexity increasing */
|
||||
pub fn pareto_quantities(
|
||||
complexity_model: &dyn ComplexityModel,
|
||||
ciphertext_modulus_log: u32,
|
||||
security_level: u64,
|
||||
internal_dim: u64,
|
||||
glwe_params: GlweParameters,
|
||||
max_log2_base: u64,
|
||||
) -> Vec<BrComplexityNoise> {
|
||||
assert!(ciphertext_modulus_log == 64);
|
||||
let pbs_param = |level, log2_base| {
|
||||
@@ -38,24 +42,22 @@ pub fn pareto_quantities(
|
||||
let variance_bsk =
|
||||
security::glwe::minimal_variance(glwe_params, ciphertext_modulus_log, security_level);
|
||||
|
||||
let mut quantities = Vec::with_capacity(64);
|
||||
let mut quantities = Vec::with_capacity(max_log2_base as usize);
|
||||
let mut increasing_complexity = 0.0;
|
||||
let mut decreasing_variance = f64::INFINITY;
|
||||
let mut counting_no_progress = 0;
|
||||
let mut prev_best_log2_base = 0_u64;
|
||||
let max_level = ciphertext_modulus_log as u64;
|
||||
for level in 1..=max_level {
|
||||
|
||||
let mut prev_best_log2_base = max_log2_base;
|
||||
|
||||
for level in 1..=ciphertext_modulus_log as u64 {
|
||||
// detect increasing noise
|
||||
let mut level_decreasing_base_noise = f64::INFINITY;
|
||||
let mut best_log2_base = 0_u64;
|
||||
let range: Vec<_> = if level == 1 {
|
||||
(1..=(max_level / level)).collect()
|
||||
} else {
|
||||
// we know a max is between 1 and prev_best_log2_base
|
||||
// and the curve has only 1 maximum close to prev_best_log2_base
|
||||
// so we start on prev_best_log2_base
|
||||
(1..=prev_best_log2_base).rev().collect()
|
||||
};
|
||||
// we know a max is between 1 and prev_best_log2_base
|
||||
// and the curve has only 1 maximum close to prev_best_log2_base
|
||||
// so we start on prev_best_log2_base
|
||||
let range = (1..=prev_best_log2_base).rev();
|
||||
|
||||
for log2_base in range {
|
||||
let base_noise = noise_atomic_pattern::variance_bootstrap(
|
||||
pbs_param(level, log2_base),
|
||||
@@ -81,7 +83,7 @@ pub fn pareto_quantities(
|
||||
continue;
|
||||
}
|
||||
let params = pbs_param(level, best_log2_base);
|
||||
let complexity_pbs = PbsComplexity::default().complexity(params, ciphertext_modulus_log);
|
||||
let complexity_pbs = complexity_model.pbs_complexity(params, ciphertext_modulus_log);
|
||||
|
||||
quantities.push(BrComplexityNoise {
|
||||
decomp: params.br_decomposition_parameter,
|
||||
@@ -118,19 +120,33 @@ impl Cache {
|
||||
|
||||
pub type PersistDecompCache = PersistentCacheHashMap<MacroParam, Vec<BrComplexityNoise>>;
|
||||
|
||||
pub fn cache(security_level: u64) -> PersistDecompCache {
|
||||
pub fn cache(
|
||||
security_level: u64,
|
||||
processing_unit: config::ProcessingUnit,
|
||||
complexity_model: Option<Arc<dyn ComplexityModel>>,
|
||||
) -> PersistDecompCache {
|
||||
let max_log2_base = processing_unit.max_br_base_log();
|
||||
|
||||
let ciphertext_modulus_log = 64;
|
||||
let tmp: String = std::env::temp_dir()
|
||||
.to_str()
|
||||
.expect("Invalid tmp dir")
|
||||
.into();
|
||||
let path = format!("{tmp}/optimizer/cache/br-decomp-cpu-64-{security_level}");
|
||||
|
||||
let hardware = processing_unit.br_to_string();
|
||||
|
||||
let path = format!("{tmp}/optimizer/cache/br-decomp-{hardware}-64-{security_level}");
|
||||
|
||||
let complexity_model = complexity_model.unwrap_or_else(|| processing_unit.complexity_model());
|
||||
|
||||
let function = move |(glwe_params, internal_dim): MacroParam| {
|
||||
pareto_quantities(
|
||||
complexity_model.as_ref(),
|
||||
ciphertext_modulus_log,
|
||||
security_level,
|
||||
internal_dim,
|
||||
glwe_params,
|
||||
max_log2_base,
|
||||
)
|
||||
};
|
||||
PersistentCacheHashMap::new(&path, "v0", function)
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use concrete_commons::dispersion::DispersionParameter;
|
||||
|
||||
use crate::computing_cost::operators::keyswitch_lwe::KsComplexity;
|
||||
use crate::computing_cost::complexity_model::ComplexityModel;
|
||||
use crate::config;
|
||||
use crate::noise_estimator::operators::atomic_pattern as noise_atomic_pattern;
|
||||
use crate::parameters::{
|
||||
GlweParameters, KeyswitchParameters, KsDecompositionParameters, LweDimension,
|
||||
@@ -31,6 +34,7 @@ impl ComplexityNoise for KsComplexityNoise {
|
||||
|
||||
/* This is stricly variance decreasing and strictly complexity increasing */
|
||||
pub fn pareto_quantities(
|
||||
complexity_model: &dyn ComplexityModel,
|
||||
ciphertext_modulus_log: u32,
|
||||
security_level: u64,
|
||||
internal_dim: u64,
|
||||
@@ -54,20 +58,18 @@ pub fn pareto_quantities(
|
||||
let mut increasing_complexity = 0.0;
|
||||
let mut decreasing_variance = f64::INFINITY;
|
||||
let mut counting_no_progress = 0;
|
||||
let mut prev_best_log2_base = 0_u64;
|
||||
let max_level = ciphertext_modulus_log as u64;
|
||||
for level in 1..=max_level {
|
||||
let mut prev_best_log2_base = ciphertext_modulus_log as u64;
|
||||
|
||||
for level in 1..=ciphertext_modulus_log as u64 {
|
||||
// detect increasing noise
|
||||
let mut level_decreasing_base_noise = f64::INFINITY;
|
||||
let mut best_log2_base = 0_u64;
|
||||
let range: Vec<_> = if level == 1 {
|
||||
(1..=(max_level / level)).collect()
|
||||
} else {
|
||||
// we know a max is between 1 and prev_best_log2_base
|
||||
// and the curve has only 1 maximum close to prev_best_log2_base
|
||||
// so we start on prev_best_log2_base
|
||||
(1..=prev_best_log2_base).rev().collect()
|
||||
};
|
||||
|
||||
// we know a max is between 1 and prev_best_log2_base
|
||||
// and the curve has only 1 maximum close to prev_best_log2_base
|
||||
// so we start on prev_best_log2_base
|
||||
let range = (1..=prev_best_log2_base).rev();
|
||||
|
||||
for log2_base in range {
|
||||
let noise_keyswitch = noise_atomic_pattern::variance_keyswitch(
|
||||
ks_param(level, log2_base),
|
||||
@@ -93,7 +95,8 @@ pub fn pareto_quantities(
|
||||
continue;
|
||||
}
|
||||
let ks_params = ks_param(level, best_log2_base);
|
||||
let complexity_keyswitch = KsComplexity.complexity(ks_params, ciphertext_modulus_log);
|
||||
let complexity_keyswitch =
|
||||
complexity_model.ks_complexity(ks_params, ciphertext_modulus_log);
|
||||
quantities.push(KsComplexityNoise {
|
||||
decomp: ks_params.ks_decomposition_parameter,
|
||||
noise: level_decreasing_base_noise,
|
||||
@@ -120,15 +123,26 @@ impl Cache {
|
||||
|
||||
pub type PersistDecompCache = PersistentCacheHashMap<MacroParam, Vec<KsComplexityNoise>>;
|
||||
|
||||
pub fn cache(security_level: u64) -> PersistDecompCache {
|
||||
pub fn cache(
|
||||
security_level: u64,
|
||||
processing_unit: config::ProcessingUnit,
|
||||
complexity_model: Option<Arc<dyn ComplexityModel>>,
|
||||
) -> PersistDecompCache {
|
||||
let ciphertext_modulus_log = 64;
|
||||
let tmp: String = std::env::temp_dir()
|
||||
.to_str()
|
||||
.expect("Invalid tmp dir")
|
||||
.into();
|
||||
let path = format!("{tmp}/optimizer/cache/ks-decomp-cpu-64-{security_level}");
|
||||
|
||||
let hardware = processing_unit.ks_to_string();
|
||||
|
||||
let path = format!("{tmp}/optimizer/cache/ks-decomp-{hardware}-64-{security_level}");
|
||||
|
||||
let complexity_model = complexity_model.unwrap_or_else(|| processing_unit.complexity_model());
|
||||
|
||||
let function = move |(glwe_params, internal_dim): MacroParam| {
|
||||
pareto_quantities(
|
||||
complexity_model.as_ref(),
|
||||
ciphertext_modulus_log,
|
||||
security_level,
|
||||
internal_dim,
|
||||
|
||||
@@ -3,17 +3,30 @@ pub mod common;
|
||||
pub mod cut;
|
||||
pub mod keyswitch;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use common::MacroParam;
|
||||
pub use cut::cut_complexity_noise;
|
||||
|
||||
use crate::computing_cost::complexity_model::ComplexityModel;
|
||||
use crate::config;
|
||||
|
||||
pub struct PersistDecompCache {
|
||||
pub ks: keyswitch::PersistDecompCache,
|
||||
pub br: blind_rotate::PersistDecompCache,
|
||||
}
|
||||
|
||||
pub fn cache(security_level: u64) -> PersistDecompCache {
|
||||
pub fn cache(
|
||||
security_level: u64,
|
||||
processing_unit: config::ProcessingUnit,
|
||||
complexity_model: Option<Arc<dyn ComplexityModel>>,
|
||||
) -> PersistDecompCache {
|
||||
PersistDecompCache {
|
||||
ks: keyswitch::cache(security_level),
|
||||
br: blind_rotate::cache(security_level),
|
||||
ks: keyswitch::cache(security_level, processing_unit, complexity_model.clone()),
|
||||
br: blind_rotate::cache(security_level, processing_unit, complexity_model),
|
||||
}
|
||||
}
|
||||
|
||||
trait ComplexityModelClone: ComplexityModel + Clone {}
|
||||
|
||||
impl<T: ComplexityModel + Clone> ComplexityModelClone for T {}
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
use clap::Parser;
|
||||
use concrete_optimizer::computing_cost::cpu::CpuComplexity;
|
||||
use concrete_optimizer::config;
|
||||
use concrete_optimizer::global_parameters::DEFAUT_DOMAINS;
|
||||
use concrete_optimizer::optimization::atomic_pattern::{
|
||||
self as optimize_atomic_pattern, OptimizationState,
|
||||
@@ -84,6 +85,8 @@ pub struct Args {
|
||||
}
|
||||
|
||||
pub fn all_results(args: &Args) -> Vec<Vec<OptimizationState>> {
|
||||
let processing_unit = config::ProcessingUnit::Cpu;
|
||||
|
||||
let sum_size = args.sum_size;
|
||||
let maximum_acceptable_error_probability = args.p_error;
|
||||
let security_level = args.security_level;
|
||||
@@ -108,7 +111,7 @@ pub fn all_results(args: &Args) -> Vec<Vec<OptimizationState>> {
|
||||
complexity_model: &CpuComplexity::default(),
|
||||
};
|
||||
|
||||
let cache = decomposition::cache(config.security_level);
|
||||
let cache = decomposition::cache(config.security_level, processing_unit, None);
|
||||
|
||||
precisions_iter
|
||||
.map(|precision| {
|
||||
|
||||
Reference in New Issue
Block a user