feat(hpu): Add Hpu backend implementation

This backend abstract communication with Hpu Fpga hardware.
It define it's proper entities to prevent circular dependencies with
tfhe-rs.
Object lifetime is handle through Arc<Mutex<T>> wrapper, and enforce
that all objects currently alive in Hpu Hw are also kept valid on the
host side.

It contains the second version of HPU instruction set (HIS_V2.0):
* DOp have following properties:
  + Template as first class citizen
  + Support of Immediate template
  + Direct parser and conversion between Asm/Hex
  + Replace deku (and it's associated endianess limitation) by
  + bitfield_struct and manual parsing

* IOp have following properties:
  + Support various number of Destination
  + Support various number of Sources
  + Support various number of Immediat values
  + Support of multiple bitwidth (Not implemented yet in the Fpga
    firmware)

Details could be view in `backends/tfhe-hpu-backend/Readme.md`
This commit is contained in:
Baptiste Roux
2025-05-16 14:15:38 +02:00
committed by B. Roux
parent a7d8d2b1d4
commit 9ee8259002
301 changed files with 46112 additions and 461 deletions

View File

@@ -0,0 +1,44 @@
[pbs_params]
lwe_dimension=724
glwe_dimension=2
polynomial_size=1024
lwe_noise_distribution={GaussianStdDev=1.2597809688976277e-05}
glwe_noise_distribution={GaussianStdDev=2.2737367544323206e-13}
pbs_base_log= 20
pbs_level= 1
ks_base_log= 2
ks_level= 7
message_width= 2
carry_width= 2
ciphertext_width= 44
[ntt_params]
core_arch="WmmUnfoldPcg"
min_pbs_nb= 10
batch_pbs_nb= 16
total_pbs_nb= 32
ct_width= 44
radix= 2
stg_nb= 10
prime_modulus="Solinas2_44_14"
psi= 32
delta= 5
[ks_params]
width= 21
lbx= 2
lby= 32
lbz= 3
[pc_params]
ksk_pc= 4
ksk_bytes_w= 64
bsk_pc= 4
bsk_bytes_w= 64
pem_pc= 2
pem_bytes_w= 64
glwe_bytes_w= 64
[regf_params]
reg_nb= 64
coef_nb= 32
[isc_params]
min_iop_size= 4
depth= 64

View File

@@ -0,0 +1,44 @@
[pbs_params]
lwe_dimension=20
glwe_dimension=2
polynomial_size=1024
lwe_noise_distribution={GaussianStdDev=0.0}
glwe_noise_distribution={GaussianStdDev=0.0}
pbs_base_log=20
pbs_level=1
ks_base_log=2
ks_level=7
message_width=2
carry_width=2
ciphertext_width=44
[ntt_params]
core_arch="WmmCompactPcg"
min_pbs_nb= 10
batch_pbs_nb= 16
total_pbs_nb= 32
ct_width= 44
radix= 2
stg_nb= 10
prime_modulus="Solinas2_44_14"
psi= 32
delta= 5
[ks_params]
width= 21
lbx= 2
lby= 32
lbz= 3
[pc_params]
ksk_pc= 4
ksk_bytes_w= 64
bsk_pc= 4
bsk_bytes_w= 64
pem_pc= 2
pem_bytes_w= 64
glwe_bytes_w= 64
[regf_params]
reg_nb= 64
coef_nb= 32
[isc_params]
min_iop_size= 4
depth= 64

View File

@@ -0,0 +1,47 @@
[pbs_params]
lwe_dimension=724
glwe_dimension=2
polynomial_size=1024
lwe_noise_distribution={GaussianStdDev= 1.2597809688976277e-05}
glwe_noise_distribution={GaussianStdDev= 2.2737367544323206e-13}
pbs_base_log= 20
pbs_level= 1
ks_base_log= 2
ks_level= 7
message_width= 2
carry_width= 2
ciphertext_width= 64
[ntt_params]
core_arch= {GF64=[5,5]}
min_pbs_nb= 10
batch_pbs_nb= 12
total_pbs_nb= 32
ct_width= 64
radix= 2
stg_nb= 10
prime_modulus= "GF64"
psi= 32
delta= 5
[ks_params]
width= 21
lbx= 2
lby= 32
lbz= 3
[pc_params]
ksk_pc= 4
ksk_bytes_w= 32
bsk_pc= 4
bsk_bytes_w= 32
pem_pc= 2
pem_bytes_w= 32
glwe_bytes_w= 32
[regf_params]
reg_nb= 64
coef_nb= 32
[isc_params]
min_iop_size= 4
depth= 64

View File

@@ -0,0 +1,44 @@
[pbs_params]
lwe_dimension=20
glwe_dimension=2
polynomial_size=1024
lwe_noise_distribution={GaussianStdDev=0.0}
glwe_noise_distribution={GaussianStdDev=0.0}
pbs_base_log=20
pbs_level=1
ks_base_log=2
ks_level=7
message_width=2
carry_width=2
ciphertext_width=64
[ntt_params]
core_arch= {GF64=[5,5]}
min_pbs_nb= 10
batch_pbs_nb= 12
total_pbs_nb= 32
ct_width= 64
radix= 2
stg_nb= 10
prime_modulus="GF64"
psi= 32
delta= 5
[ks_params]
width= 21
lbx= 2
lby= 32
lbz= 3
[pc_params]
ksk_pc= 4
ksk_bytes_w= 32
bsk_pc= 4
bsk_bytes_w= 32
pem_pc= 2
pem_bytes_w= 32
glwe_bytes_w= 32
[regf_params]
reg_nb= 64
coef_nb= 32
[isc_params]
min_iop_size= 4
depth= 64

View File

@@ -0,0 +1,47 @@
[pbs_params]
lwe_dimension=804
glwe_dimension=1
polynomial_size=2048
lwe_noise_distribution={GaussianStdDev= 5.963599673924788e-6}
glwe_noise_distribution={GaussianStdDev= 2.8452674713391114e-15}
pbs_base_log= 23
pbs_level= 1
ks_base_log= 2
ks_level= 8
message_width= 2
carry_width= 2
ciphertext_width= 64
[ntt_params]
core_arch= {GF64=[5,6]}
min_pbs_nb= 6
batch_pbs_nb= 12
total_pbs_nb= 32
ct_width= 64
radix= 2
stg_nb= 11
prime_modulus= "GF64"
psi= 32
delta= 5
[ks_params]
width= 21
lbx= 3
lby= 64
lbz= 3
[pc_params]
ksk_pc= 16
ksk_bytes_w= 32
bsk_pc= 8
bsk_bytes_w= 32
pem_pc= 2
pem_bytes_w= 32
glwe_bytes_w= 32
[regf_params]
reg_nb= 64
coef_nb= 32
[isc_params]
min_iop_size= 4
depth= 64

View File

@@ -0,0 +1,48 @@
[pbs_params]
lwe_dimension=804
glwe_dimension=1
polynomial_size=2048
lwe_noise_distribution={GaussianStdDev= 5.963599673924788e-6}
glwe_noise_distribution={GaussianStdDev= 2.8452674713391114e-15}
pbs_base_log= 23
pbs_level= 1
ks_base_log= 2
ks_level= 8
message_width= 2
carry_width= 2
ciphertext_width= 64
opportunistic=true
[ntt_params]
core_arch= {GF64=[5,6]}
min_pbs_nb= 11
batch_pbs_nb= 12
total_pbs_nb= 32
ct_width= 64
radix= 2
stg_nb= 11
prime_modulus= "GF64"
psi= 64
delta= 5
[ks_params]
width= 21
lbx= 3
lby= 64
lbz= 3
[pc_params]
ksk_pc= 16
ksk_bytes_w= 32
bsk_pc= 8
bsk_bytes_w= 32
pem_pc= 2
pem_bytes_w= 32
glwe_bytes_w= 32
[regf_params]
reg_nb= 64
coef_nb= 32
[isc_params]
min_iop_size= 4
depth= 64

View File

@@ -0,0 +1,44 @@
[pbs_params]
lwe_dimension=20
glwe_dimension=1
polynomial_size=2048
lwe_noise_distribution={TUniformBound= 0}
glwe_noise_distribution={TUniformBound= 0}
pbs_base_log=23
pbs_level=1
ks_base_log=2
ks_level=7
message_width=2
carry_width=2
ciphertext_width=64
[ntt_params]
core_arch= {GF64=[5,6]}
min_pbs_nb= 10
batch_pbs_nb= 12
total_pbs_nb= 32
ct_width= 64
radix= 2
stg_nb= 11
prime_modulus="GF64"
psi= 32
delta= 5
[ks_params]
width= 21
lbx= 2
lby= 32
lbz= 3
[pc_params]
ksk_pc= 4
ksk_bytes_w= 32
bsk_pc= 4
bsk_bytes_w= 32
pem_pc= 2
pem_bytes_w= 32
glwe_bytes_w= 32
[regf_params]
reg_nb= 64
coef_nb= 32
[isc_params]
min_iop_size= 4
depth= 64

View File

@@ -0,0 +1,48 @@
[pbs_params]
lwe_dimension=839
glwe_dimension=1
polynomial_size=2048
lwe_noise_distribution={TUniformBound= 4}
glwe_noise_distribution={TUniformBound= 17}
pbs_base_log= 23
pbs_level= 1
ks_base_log= 2
ks_level= 7
message_width= 2
carry_width= 2
ciphertext_width= 64
opportunistic=true
[ntt_params]
core_arch= {GF64=[5,6]}
min_pbs_nb= 11
batch_pbs_nb= 12
total_pbs_nb= 32
ct_width= 64
radix= 2
stg_nb= 11
prime_modulus= "GF64"
psi= 64
delta= 5
[ks_params]
width= 21
lbx= 3
lby= 64
lbz= 3
[pc_params]
ksk_pc= 16
ksk_bytes_w= 32
bsk_pc= 8
bsk_bytes_w= 32
pem_pc= 2
pem_bytes_w= 32
glwe_bytes_w= 32
[regf_params]
reg_nb= 64
coef_nb= 32
[isc_params]
min_iop_size= 4
depth= 64