Powdr openvm extension with new hints (#3100)

Extend our openvm guest/host with support for new hints.
Includes hints for `k256` affine coordinate inverse and sqrt.
This commit is contained in:
Leandro Pacheco
2025-07-30 12:50:23 -03:00
committed by GitHub
parent 5c8ecd2a46
commit 4be51aa95a
21 changed files with 1571 additions and 65 deletions

View File

@@ -37,6 +37,9 @@ members = [
"autoprecompiles",
"openvm",
"cli-openvm",
"openvm/extensions/hints-guest",
"openvm/extensions/hints-transpiler",
"openvm/extensions/hints-circuit",
]
exclude = ["riscv-runtime"]
@@ -82,6 +85,10 @@ powdr-schemas = { path = "./schemas", version = "0.1.4" }
powdr-autoprecompiles = { path = "./autoprecompiles", version = "0.1.4" }
powdr-openvm = { path = "./openvm", version = "0.1.4" }
powdr-openvm-hints-guest = { path = "./openvm/extensions/hints-guest", version = "0.1.4" }
powdr-openvm-hints-transpiler = { path = "./openvm/extensions/hints-transpiler", version = "0.1.4" }
powdr-openvm-hints-circuit = { path = "./openvm/extensions/hints-circuit", version = "0.1.4" }
# openvm
openvm = { git = "https://github.com/powdr-labs/openvm.git", rev = "391b737" }
openvm-build = { git = "https://github.com/powdr-labs/openvm.git", rev = "391b737" }
@@ -115,6 +122,8 @@ openvm-pairing-circuit = { git = "https://github.com/powdr-labs/openvm.git", rev
openvm-pairing-transpiler = { git = "https://github.com/powdr-labs/openvm.git", rev = "391b737" }
openvm-native-circuit = { git = "https://github.com/powdr-labs/openvm.git", rev = "391b737", default-features = false }
openvm-native-recursion = { git = "https://github.com/powdr-labs/openvm.git", rev = "391b737", default-features = false }
openvm-platform = { git = "https://github.com/powdr-labs/openvm.git", rev = "391b737" }
openvm-custom-insn = { git = "https://github.com/powdr-labs/openvm.git", rev = "391b737" }
# stark-backend
openvm-stark-sdk = { git = "https://github.com/powdr-labs/stark-backend.git", rev = "ee4e22b", default-features = false, features = [

View File

@@ -44,6 +44,9 @@ powdr-riscv-elf.workspace = true
powdr-autoprecompiles.workspace = true
powdr-constraint-solver.workspace = true
powdr-openvm-hints-transpiler.workspace = true
powdr-openvm-hints-circuit.workspace = true
eyre = "0.6.12"
serde = "1.0.217"
derive_more = { version = "2.0.1", default-features = false, features = [

View File

@@ -0,0 +1,18 @@
[package]
name = "powdr-openvm-hints-circuit"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true
[dependencies]
openvm-circuit = { workspace = true }
openvm-instructions = { workspace = true }
openvm-rv32im-circuit = { workspace = true }
openvm-stark-backend = { workspace = true }
openvm-stark-sdk = { workspace = true }
powdr-openvm-hints-transpiler = { workspace = true }
eyre = "0.6.12"
crypto-bigint = "0.6.1"
elliptic-curve = "0.13.8"

View File

@@ -0,0 +1,218 @@
use openvm_circuit::arch::{PhantomSubExecutor, Streams};
use openvm_circuit::system::memory::MemoryController;
use openvm_instructions::riscv::RV32_MEMORY_AS;
use openvm_instructions::PhantomDiscriminant;
use openvm_rv32im_circuit::adapters::unsafe_read_rv32_register;
use openvm_stark_backend::p3_field::PrimeField32;
use crate::field10x26_k256;
/// Example hint implementation.
/// Takes a single u32 as input and sets the hint to be the bytes of the u32 in reverse order.
pub struct ReverseBytesSubEx;
impl<F: PrimeField32> PhantomSubExecutor<F> for ReverseBytesSubEx {
fn phantom_execute(
&mut self,
memory: &MemoryController<F>,
streams: &mut Streams<F>,
_discriminant: PhantomDiscriminant,
a: F,
_b: F,
c_upper: u16,
) -> eyre::Result<()> {
assert_eq!(c_upper, 0);
// read register
let rs1 = unsafe_read_rv32_register(memory, a);
// read memory
let bytes = memory.unsafe_read::<4>(
F::from_canonical_u32(RV32_MEMORY_AS),
F::from_canonical_u32(rs1),
);
// write hint as bytes in reverse
let hint_bytes = bytes.into_iter().rev().collect();
streams.hint_stream = hint_bytes;
Ok(())
}
}
/// Takes as input a pointer to 32 bytes, the SEC1 encoding (i.e., big-endian) of a k256 coordinate field element.
/// Sets the hint to be the inverse of the field element in the same encoding (if not zero).
/// Sets the hint to zero when the input is zero.
pub struct K256InverseFieldSubEx;
use crypto_bigint::const_monty_form;
use crypto_bigint::impl_modulus;
use crypto_bigint::modular::ConstMontyParams;
use crypto_bigint::Encoding;
use crypto_bigint::Zero;
use crypto_bigint::U256;
impl_modulus!(
K256Mod,
U256,
"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F"
);
impl<F: PrimeField32> PhantomSubExecutor<F> for K256InverseFieldSubEx {
fn phantom_execute(
&mut self,
memory: &MemoryController<F>,
streams: &mut Streams<F>,
_: PhantomDiscriminant,
a: F,
_b: F,
c_upper: u16,
) -> eyre::Result<()> {
assert_eq!(c_upper, 0);
// read register
let rs1 = unsafe_read_rv32_register(memory, a);
// read the field element
let bytes: [u8; 32] = memory
.unsafe_read::<32>(
F::from_canonical_u32(RV32_MEMORY_AS),
F::from_canonical_u32(rs1),
)
.into_iter()
.map(|f| u8::try_from(f.as_canonical_u32()).expect("value not a byte"))
.collect::<Vec<_>>()
.try_into()
.unwrap();
let n = U256::from_be_bytes(bytes);
// perform the inverse.
let n_mod = const_monty_form!(n, K256Mod);
let n_inv = if !(bool::from(n_mod.is_zero())) {
n_mod.inv().unwrap().retrieve()
} else {
U256::ZERO
};
let inv_bytes = n_inv
.to_be_bytes()
.into_iter()
.map(|b| F::from_canonical_u8(b))
.collect();
streams.hint_stream = inv_bytes;
Ok(())
}
}
/// Size in bytes of the k256 field element in 10x26 representation.
const FIELD10X26_BYTES: usize = 40; // [u32;10]
/// Takes as input a pointer to the inner representation of a k256 coordinate field element (in 32-bit architectures).
/// Sets the hint to be the inverse of the input (if not zero), in the same representation.
/// If the input is zero (normalized or not), the hint is also set, but undefined.
pub struct K256InverseField10x26SubEx;
impl<F: PrimeField32> PhantomSubExecutor<F> for K256InverseField10x26SubEx {
fn phantom_execute(
&mut self,
memory: &MemoryController<F>,
streams: &mut Streams<F>,
_: PhantomDiscriminant,
a: F,
_b: F,
c_upper: u16,
) -> eyre::Result<()> {
assert_eq!(c_upper, 0);
// read register
let rs1 = unsafe_read_rv32_register(memory, a);
// read the k256 field_10x26 as raw bytes
let bytes: [u8; FIELD10X26_BYTES] = memory
.unsafe_read::<{ FIELD10X26_BYTES }>(
F::from_canonical_u32(RV32_MEMORY_AS),
F::from_canonical_u32(rs1),
)
.into_iter()
.map(|f| u8::try_from(f.as_canonical_u32()).expect("value not a byte"))
.collect::<Vec<_>>()
.try_into()
.unwrap();
// we just reinterpret the bytes as a k256 field element. We don't use mem::transmute to avoid alignment issues
let mut elem = [0u32; 10];
unsafe {
std::ptr::copy_nonoverlapping(
bytes.as_ptr(),
elem.as_mut_ptr() as *mut u8,
FIELD10X26_BYTES,
);
}
let elem = field10x26_k256::FieldElement10x26(elem);
let inv = elem.invert();
// okay to transmute in the opposite direction
let inv_bytes: [u8; FIELD10X26_BYTES] = unsafe { std::mem::transmute(inv.0) };
streams.hint_stream = inv_bytes
.into_iter()
.map(|b| F::from_canonical_u8(b))
.collect();
Ok(())
}
}
/// Takes as input a pointer to the inner representation of a k256 coordinate field element (in 32-bit architectures).
/// If the number is square, sets the hint an u32 of value one, followed by a square root in the same inner representation.
/// If the number is not square, sets the hint to an u32 of value zero.
pub struct K256SqrtField10x26SubEx;
impl<F: PrimeField32> PhantomSubExecutor<F> for K256SqrtField10x26SubEx {
fn phantom_execute(
&mut self,
memory: &MemoryController<F>,
streams: &mut Streams<F>,
_: PhantomDiscriminant,
a: F,
_b: F,
c_upper: u16,
) -> eyre::Result<()> {
assert_eq!(c_upper, 0);
// read register
let rs1 = unsafe_read_rv32_register(memory, a);
// read the k256 field_10x26 as raw bytes
let bytes: [u8; FIELD10X26_BYTES] = memory
.unsafe_read::<{ FIELD10X26_BYTES }>(
F::from_canonical_u32(RV32_MEMORY_AS),
F::from_canonical_u32(rs1),
)
.into_iter()
.map(|f| u8::try_from(f.as_canonical_u32()).expect("value not a byte"))
.collect::<Vec<_>>()
.try_into()
.unwrap();
// we just reinterpret the bytes as a k256 field element. Can't use mem::transmute due to alighment requirements
let mut elem = [0u32; 10];
unsafe {
std::ptr::copy_nonoverlapping(
bytes.as_ptr(),
elem.as_mut_ptr() as *mut u8,
FIELD10X26_BYTES,
);
}
let elem = field10x26_k256::FieldElement10x26(elem);
let res = elem.sqrt();
if res.is_some().into() {
// return 1 followed by the result
let bytes: [u8; FIELD10X26_BYTES] = unsafe {
// safe to transmute into u8 array
std::mem::transmute(res.unwrap().0)
};
streams.hint_stream = 1u32
.to_le_bytes() // indicates that a square root exists
.into_iter()
.chain(bytes)
.map(|b| F::from_canonical_u8(b))
.collect();
} else {
// no square root, return a 0
streams.hint_stream = 0u32
.to_le_bytes()
.map(|b| F::from_canonical_u8(b))
.into_iter()
.collect();
}
Ok(())
}
}

View File

@@ -0,0 +1,812 @@
//! The code here has been mostly copied from the `k256` crate.
//! Its the 32-bit implementation of the field element.
use elliptic_curve::consts::U32;
use elliptic_curve::{
subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption},
zeroize::Zeroize,
FieldBytesEncoding,
};
// use crypto_bigint::U256;
use elliptic_curve::bigint::ArrayEncoding;
use elliptic_curve::bigint::U256;
pub type FieldBytes = elliptic_curve::FieldBytes<Secp256k1>;
/// Order of the secp256k1 elliptic curve in hexadecimal.
const ORDER_HEX: &str = "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141";
/// Order of the secp256k1 elliptic curve.
const ORDER: U256 = U256::from_be_hex(ORDER_HEX);
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, PartialOrd, Ord)]
pub struct Secp256k1;
impl elliptic_curve::Curve for Secp256k1 {
/// 32-byte serialized field elements.
type FieldBytesSize = U32;
/// 256-bit field modulus.
type Uint = U256;
/// Curve order.
const ORDER: U256 = ORDER;
}
impl FieldBytesEncoding<Secp256k1> for U256 {
fn decode_field_bytes(field_bytes: &FieldBytes) -> Self {
U256::from_be_byte_array(*field_bytes)
}
fn encode_field_bytes(&self) -> FieldBytes {
self.to_be_byte_array()
}
}
impl elliptic_curve::PrimeCurve for Secp256k1 {}
// -----------------------------------------------------------------------------------------------------
/// Scalars modulo SECP256k1 modulus (2^256 - 2^32 - 2^9 - 2^8 - 2^7 - 2^6 - 2^4 - 1).
/// Uses 10 32-bit limbs (little-endian), where in the normalized form
/// first 9 contain 26 bits of the value each, and the last one contains 22 bits.
/// CurveArithmetic operations can be done without modulo reduction for some time,
/// using the remaining overflow bits.
#[derive(Clone, Copy, Debug)]
pub struct FieldElement10x26(pub(crate) [u32; 10]);
// TODO: maybe instead clean this file up and only keep code that is used?
#[allow(unused)]
impl FieldElement10x26 {
/// Zero element.
pub const ZERO: Self = Self([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
/// Multiplicative identity.
pub const ONE: Self = Self([1, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
/// Attempts to parse the given byte array as an SEC1-encoded field element.
/// Does not check the result for being in the correct range.
pub(crate) const fn from_bytes_unchecked(bytes: &[u8; 32]) -> Self {
let w0 = (bytes[31] as u32)
| ((bytes[30] as u32) << 8)
| ((bytes[29] as u32) << 16)
| (((bytes[28] & 0x3) as u32) << 24);
let w1 = (((bytes[28] >> 2) as u32) & 0x3f)
| ((bytes[27] as u32) << 6)
| ((bytes[26] as u32) << 14)
| (((bytes[25] & 0xf) as u32) << 22);
let w2 = (((bytes[25] >> 4) as u32) & 0xf)
| ((bytes[24] as u32) << 4)
| ((bytes[23] as u32) << 12)
| (((bytes[22] & 0x3f) as u32) << 20);
let w3 = (((bytes[22] >> 6) as u32) & 0x3)
| ((bytes[21] as u32) << 2)
| ((bytes[20] as u32) << 10)
| ((bytes[19] as u32) << 18);
let w4 = (bytes[18] as u32)
| ((bytes[17] as u32) << 8)
| ((bytes[16] as u32) << 16)
| (((bytes[15] & 0x3) as u32) << 24);
let w5 = (((bytes[15] >> 2) as u32) & 0x3f)
| ((bytes[14] as u32) << 6)
| ((bytes[13] as u32) << 14)
| (((bytes[12] & 0xf) as u32) << 22);
let w6 = (((bytes[12] >> 4) as u32) & 0xf)
| ((bytes[11] as u32) << 4)
| ((bytes[10] as u32) << 12)
| (((bytes[9] & 0x3f) as u32) << 20);
let w7 = (((bytes[9] >> 6) as u32) & 0x3)
| ((bytes[8] as u32) << 2)
| ((bytes[7] as u32) << 10)
| ((bytes[6] as u32) << 18);
let w8 = (bytes[5] as u32)
| ((bytes[4] as u32) << 8)
| ((bytes[3] as u32) << 16)
| (((bytes[2] & 0x3) as u32) << 24);
let w9 = (((bytes[2] >> 2) as u32) & 0x3f)
| ((bytes[1] as u32) << 6)
| ((bytes[0] as u32) << 14);
Self([w0, w1, w2, w3, w4, w5, w6, w7, w8, w9])
}
/// Attempts to parse the given byte array as an SEC1-encoded field element.
///
/// Returns None if the byte array does not contain a big-endian integer in the range
/// [0, p).
pub fn from_bytes(bytes: &FieldBytes) -> CtOption<Self> {
let res = Self::from_bytes_unchecked(bytes.as_ref());
let overflow = res.get_overflow();
CtOption::new(res, !overflow)
}
pub const fn from_u64(val: u64) -> Self {
let w0 = (val as u32) & 0x3FFFFFF;
let val = val >> 26;
let w1 = (val as u32) & 0x3FFFFFF;
let w2 = (val >> 26) as u32;
Self([w0, w1, w2, 0, 0, 0, 0, 0, 0, 0])
}
/// Returns the SEC1 encoding of this field element.
pub fn to_bytes(self) -> FieldBytes {
let mut r = FieldBytes::default();
r[0] = (self.0[9] >> 14) as u8;
r[1] = (self.0[9] >> 6) as u8;
r[2] = ((self.0[9] as u8 & 0x3Fu8) << 2) | ((self.0[8] >> 24) as u8 & 0x3);
r[3] = (self.0[8] >> 16) as u8;
r[4] = (self.0[8] >> 8) as u8;
r[5] = self.0[8] as u8;
r[6] = (self.0[7] >> 18) as u8;
r[7] = (self.0[7] >> 10) as u8;
r[8] = (self.0[7] >> 2) as u8;
r[9] = ((self.0[7] as u8 & 0x3u8) << 6) | ((self.0[6] >> 20) as u8 & 0x3fu8);
r[10] = (self.0[6] >> 12) as u8;
r[11] = (self.0[6] >> 4) as u8;
r[12] = ((self.0[6] as u8 & 0xfu8) << 4) | ((self.0[5] >> 22) as u8 & 0xfu8);
r[13] = (self.0[5] >> 14) as u8;
r[14] = (self.0[5] >> 6) as u8;
r[15] = ((self.0[5] as u8 & 0x3fu8) << 2) | ((self.0[4] >> 24) as u8 & 0x3u8);
r[16] = (self.0[4] >> 16) as u8;
r[17] = (self.0[4] >> 8) as u8;
r[18] = self.0[4] as u8;
r[19] = (self.0[3] >> 18) as u8;
r[20] = (self.0[3] >> 10) as u8;
r[21] = (self.0[3] >> 2) as u8;
r[22] = ((self.0[3] as u8 & 0x3u8) << 6) | ((self.0[2] >> 20) as u8 & 0x3fu8);
r[23] = (self.0[2] >> 12) as u8;
r[24] = (self.0[2] >> 4) as u8;
r[25] = ((self.0[2] as u8 & 0xfu8) << 4) | ((self.0[1] >> 22) as u8 & 0xfu8);
r[26] = (self.0[1] >> 14) as u8;
r[27] = (self.0[1] >> 6) as u8;
r[28] = ((self.0[1] as u8 & 0x3fu8) << 2) | ((self.0[0] >> 24) as u8 & 0x3u8);
r[29] = (self.0[0] >> 16) as u8;
r[30] = (self.0[0] >> 8) as u8;
r[31] = self.0[0] as u8;
r
}
/// Adds `x * (2^256 - modulus)`.
fn add_modulus_correction(&self, x: u32) -> Self {
// add (2^256 - modulus) * x to the first limb
let t0 = self.0[0] + x * 0x3D1u32;
// Propagate excess bits up the limbs
let t1 = self.0[1] + (x << 6); // add `x` times the high bit of correction (2^32)
let t1 = t1 + (t0 >> 26);
let t0 = t0 & 0x3FFFFFFu32;
let t2 = self.0[2] + (t1 >> 26);
let t1 = t1 & 0x3FFFFFFu32;
let t3 = self.0[3] + (t2 >> 26);
let t2 = t2 & 0x3FFFFFFu32;
let t4 = self.0[4] + (t3 >> 26);
let t3 = t3 & 0x3FFFFFFu32;
let t5 = self.0[5] + (t4 >> 26);
let t4 = t4 & 0x3FFFFFFu32;
let t6 = self.0[6] + (t5 >> 26);
let t5 = t5 & 0x3FFFFFFu32;
let t7 = self.0[7] + (t6 >> 26);
let t6 = t6 & 0x3FFFFFFu32;
let t8 = self.0[8] + (t7 >> 26);
let t7 = t7 & 0x3FFFFFFu32;
let t9 = self.0[9] + (t8 >> 26);
let t8 = t8 & 0x3FFFFFFu32;
Self([t0, t1, t2, t3, t4, t5, t6, t7, t8, t9])
}
/// Subtracts the overflow in the last limb and return it with the new field element.
/// Equivalent to subtracting a multiple of 2^256.
fn subtract_modulus_approximation(&self) -> (Self, u32) {
let x = self.0[9] >> 22;
let t9 = self.0[9] & 0x03FFFFFu32; // equivalent to self -= 2^256 * x
(
Self([
self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], self.0[6],
self.0[7], self.0[8], t9,
]),
x,
)
}
/// Checks if the field element is greater or equal to the modulus.
fn get_overflow(&self) -> Choice {
let m = self.0[2] & self.0[3] & self.0[4] & self.0[5] & self.0[6] & self.0[7] & self.0[8];
let x = (self.0[9] >> 22 != 0)
| ((self.0[9] == 0x3FFFFFu32)
& (m == 0x3FFFFFFu32)
& ((self.0[1] + 0x40u32 + ((self.0[0] + 0x3D1u32) >> 26)) > 0x3FFFFFFu32));
Choice::from(x as u8)
}
/// Brings the field element's magnitude to 1, but does not necessarily normalize it.
pub fn normalize_weak(&self) -> Self {
// Reduce t9 at the start so there will be at most a single carry from the first pass
let (t, x) = self.subtract_modulus_approximation();
// The first pass ensures the magnitude is 1, ...
let res = t.add_modulus_correction(x);
// ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element)
debug_assert!(res.0[9] >> 23 == 0);
res
}
/// Fully normalizes the field element.
/// That is, first nine limbs are at most 26 bit large, the last limb is at most 22 bit large,
/// and the value is less than the modulus.
pub fn normalize(&self) -> Self {
let res = self.normalize_weak();
// At most a single final reduction is needed;
// check if the value is >= the field characteristic
let overflow = res.get_overflow();
// Apply the final reduction (for constant-time behaviour, we do it always)
let res_corrected = res.add_modulus_correction(1u32);
// Mask off the possible multiple of 2^256 from the final reduction
let (res_corrected, x) = res_corrected.subtract_modulus_approximation();
// If the last limb didn't carry to bit 23 already,
// then it should have after any final reduction
debug_assert!(x == (overflow.unwrap_u8() as u32));
Self::conditional_select(&res, &res_corrected, overflow)
}
/// Checks if the field element becomes zero if normalized.
pub fn normalizes_to_zero(&self) -> Choice {
let res = self.normalize_weak();
let t0 = res.0[0];
let t1 = res.0[1];
let t2 = res.0[2];
let t3 = res.0[3];
let t4 = res.0[4];
let t5 = res.0[5];
let t6 = res.0[6];
let t7 = res.0[7];
let t8 = res.0[8];
let t9 = res.0[9];
// z0 tracks a possible raw value of 0, z1 tracks a possible raw value of the modulus
let z0 = t0 | t1 | t2 | t3 | t4 | t5 | t6 | t7 | t8 | t9;
let z1 = (t0 ^ 0x3D0u32)
& (t1 ^ 0x40u32)
& t2
& t3
& t4
& t5
& t6
& t7
& t8
& (t9 ^ 0x3C00000u32);
Choice::from(((z0 == 0) | (z1 == 0x3FFFFFFu32)) as u8)
}
/// Determine if this `FieldElement10x26` is zero.
///
/// # Returns
///
/// If zero, return `Choice(1)`. Otherwise, return `Choice(0)`.
pub fn is_zero(&self) -> Choice {
Choice::from(
((self.0[0]
| self.0[1]
| self.0[2]
| self.0[3]
| self.0[4]
| self.0[5]
| self.0[6]
| self.0[7]
| self.0[8]
| self.0[9])
== 0) as u8,
)
}
/// Determine if this `FieldElement10x26` is odd in the SEC1 sense: `self mod 2 == 1`.
///
/// # Returns
///
/// If odd, return `Choice(1)`. Otherwise, return `Choice(0)`.
pub fn is_odd(&self) -> Choice {
(self.0[0] as u8 & 1).into()
}
// The maximum number `m` for which `0x3FFFFFF * 2 * (m + 1) < 2^32`
pub const fn max_magnitude() -> u32 {
31u32
}
/// Returns -self, treating it as a value of given magnitude.
/// The provided magnitude must be equal or greater than the actual magnitude of `self`.
pub const fn negate(&self, magnitude: u32) -> Self {
let m: u32 = magnitude + 1;
let r0 = 0x3FFFC2Fu32 * 2 * m - self.0[0];
let r1 = 0x3FFFFBFu32 * 2 * m - self.0[1];
let r2 = 0x3FFFFFFu32 * 2 * m - self.0[2];
let r3 = 0x3FFFFFFu32 * 2 * m - self.0[3];
let r4 = 0x3FFFFFFu32 * 2 * m - self.0[4];
let r5 = 0x3FFFFFFu32 * 2 * m - self.0[5];
let r6 = 0x3FFFFFFu32 * 2 * m - self.0[6];
let r7 = 0x3FFFFFFu32 * 2 * m - self.0[7];
let r8 = 0x3FFFFFFu32 * 2 * m - self.0[8];
let r9 = 0x03FFFFFu32 * 2 * m - self.0[9];
Self([r0, r1, r2, r3, r4, r5, r6, r7, r8, r9])
}
/// Returns self + rhs mod p.
/// Sums the magnitudes.
pub const fn add(&self, rhs: &Self) -> Self {
Self([
self.0[0] + rhs.0[0],
self.0[1] + rhs.0[1],
self.0[2] + rhs.0[2],
self.0[3] + rhs.0[3],
self.0[4] + rhs.0[4],
self.0[5] + rhs.0[5],
self.0[6] + rhs.0[6],
self.0[7] + rhs.0[7],
self.0[8] + rhs.0[8],
self.0[9] + rhs.0[9],
])
}
/// Multiplies by a single-limb integer.
/// Multiplies the magnitude by the same value.
pub const fn mul_single(&self, rhs: u32) -> Self {
Self([
self.0[0] * rhs,
self.0[1] * rhs,
self.0[2] * rhs,
self.0[3] * rhs,
self.0[4] * rhs,
self.0[5] * rhs,
self.0[6] * rhs,
self.0[7] * rhs,
self.0[8] * rhs,
self.0[9] * rhs,
])
}
#[inline(always)]
fn mul_inner(&self, rhs: &Self) -> Self {
/*
`square()` is just `mul()` with equal arguments. Rust compiler is smart enough
to do all the necessary optimizations for this case, but it needs to have this information
inside a function. If a function is just *called* with the same arguments,
this information cannot be used, so the function must be inlined while using the same arguments.
Now `mul()` is quite long and therefore expensive to inline. So we have an inner (inlined)
function, that is used inside `mul()` and `square()`, and when it is used with the same
arguments in `square()`, compiler is able to use that fact after inlining.
*/
let m = 0x3FFFFFFu64;
let rr0 = 0x3D10u64;
let rr1 = 0x400u64;
let a0 = self.0[0] as u64;
let a1 = self.0[1] as u64;
let a2 = self.0[2] as u64;
let a3 = self.0[3] as u64;
let a4 = self.0[4] as u64;
let a5 = self.0[5] as u64;
let a6 = self.0[6] as u64;
let a7 = self.0[7] as u64;
let a8 = self.0[8] as u64;
let a9 = self.0[9] as u64;
let b0 = rhs.0[0] as u64;
let b1 = rhs.0[1] as u64;
let b2 = rhs.0[2] as u64;
let b3 = rhs.0[3] as u64;
let b4 = rhs.0[4] as u64;
let b5 = rhs.0[5] as u64;
let b6 = rhs.0[6] as u64;
let b7 = rhs.0[7] as u64;
let b8 = rhs.0[8] as u64;
let b9 = rhs.0[9] as u64;
// [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
// for 0 <= x <= 9, px is a shorthand for sum(a[i]*b[x-i], i=0..x).
// for 9 <= x <= 18, px is a shorthand for sum(a[i]*b[x-i], i=(x-9)..9)
// Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*rr1 x*rr0].
let mut c: u64;
let mut d: u64;
d = a0 * b9
+ a1 * b8
+ a2 * b7
+ a3 * b6
+ a4 * b5
+ a5 * b4
+ a6 * b3
+ a7 * b2
+ a8 * b1
+ a9 * b0;
// [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0]
let t9 = (d & m) as u32;
d >>= 26;
debug_assert!(t9 >> 26 == 0);
debug_assert!(d >> 38 == 0);
// [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0]
c = a0 * b0;
debug_assert!(c >> 60 == 0);
// [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0]
d +=
a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 + a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1;
debug_assert!(d >> 63 == 0);
// [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0]
let u0 = (d & m) as u32;
d >>= 26;
c += u0 as u64 * rr0;
debug_assert!(u0 >> 26 == 0);
debug_assert!(d >> 37 == 0);
debug_assert!(c >> 61 == 0);
// [d u0 t9 0 0 0 0 0 0 0 0 c-u0*rr0] = [p10 p9 0 0 0 0 0 0 0 0 p0]
let t0 = (c & m) as u32;
c >>= 26;
c += u0 as u64 * rr1;
debug_assert!(t0 >> 26 == 0);
debug_assert!(c >> 37 == 0);
// [d u0 t9 0 0 0 0 0 0 0 c-u0*rr1 t0-u0*rr0] = [p10 p9 0 0 0 0 0 0 0 0 p0]
// [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0]
c += a0 * b1 + a1 * b0;
debug_assert!(c >> 62 == 0);
// [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0]
d += a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 + a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2;
debug_assert!(d >> 63 == 0);
// [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0]
let u1 = (d & m) as u32;
d >>= 26;
c += u1 as u64 * rr0;
debug_assert!(u1 >> 26 == 0);
debug_assert!(d >> 37 == 0);
debug_assert!(c >> 63 == 0);
// [d u1 0 t9 0 0 0 0 0 0 0 c-u1*rr0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0]
let t1 = (c & m) as u32;
c >>= 26;
c += u1 as u64 * rr1;
debug_assert!(t1 >> 26 == 0);
debug_assert!(c >> 38 == 0);
// [d u1 0 t9 0 0 0 0 0 0 c-u1*rr1 t1-u1*rr0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0]
// [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0]
c += a0 * b2 + a1 * b1 + a2 * b0;
debug_assert!(c >> 62 == 0);
// [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0]
d += a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 + a8 * b4 + a9 * b3;
debug_assert!(d >> 63 == 0);
// [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0]
let u2 = (d & m) as u32;
d >>= 26;
c += u2 as u64 * rr0;
debug_assert!(u2 >> 26 == 0);
debug_assert!(d >> 37 == 0);
debug_assert!(c >> 63 == 0);
// [d u2 0 0 t9 0 0 0 0 0 0 c-u2*rr0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0]
let t2 = (c & m) as u32;
c >>= 26;
c += u2 as u64 * rr1;
debug_assert!(t2 >> 26 == 0);
debug_assert!(c >> 38 == 0);
// [d u2 0 0 t9 0 0 0 0 0 c-u2*rr1 t2-u2*rr0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0]
// [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0]
c += a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
debug_assert!(c >> 63 == 0);
// [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0]
d += a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 + a9 * b4;
debug_assert!(d >> 63 == 0);
// [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0]
let u3 = (d & m) as u32;
d >>= 26;
c += u3 as u64 * rr0;
debug_assert!(u3 >> 26 == 0);
debug_assert!(d >> 37 == 0);
// [d u3 0 0 0 t9 0 0 0 0 0 c-u3*rr0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0]
let t3 = (c & m) as u32;
c >>= 26;
c += u3 as u64 * rr1;
debug_assert!(t3 >> 26 == 0);
debug_assert!(c >> 39 == 0);
// [d u3 0 0 0 t9 0 0 0 0 c-u3*rr1 t3-u3*rr0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0]
// [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0]
c += a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
debug_assert!(c >> 63 == 0);
// [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0]
d += a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5;
debug_assert!(d >> 62 == 0);
// [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0]
let u4 = (d & m) as u32;
d >>= 26;
c += u4 as u64 * rr0;
debug_assert!(u4 >> 26 == 0);
debug_assert!(d >> 36 == 0);
// [d u4 0 0 0 0 t9 0 0 0 0 c-u4*rr0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0]
let t4 = (c & m) as u32;
c >>= 26;
c += u4 as u64 * rr1;
debug_assert!(t4 >> 26 == 0);
debug_assert!(c >> 39 == 0);
// [d u4 0 0 0 0 t9 0 0 0 c-u4*rr1 t4-u4*rr0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0]
// [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0]
c += a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
debug_assert!(c >> 63 == 0);
// [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0]
d += a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6;
debug_assert!(d >> 62 == 0);
// [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0]
let u5 = (d & m) as u32;
d >>= 26;
c += u5 as u64 * rr0;
debug_assert!(u5 >> 26 == 0);
debug_assert!(d >> 36 == 0);
// [d u5 0 0 0 0 0 t9 0 0 0 c-u5*rr0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0]
let t5 = (c & m) as u32;
c >>= 26;
c += u5 as u64 * rr1;
debug_assert!(t5 >> 26 == 0);
debug_assert!(c >> 39 == 0);
// [d u5 0 0 0 0 0 t9 0 0 c-u5*rr1 t5-u5*rr0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0]
// [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0]
c += a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;
debug_assert!(c >> 63 == 0);
// [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0]
d += a7 * b9 + a8 * b8 + a9 * b7;
debug_assert!(d >> 61 == 0);
// [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0]
let u6 = (d & m) as u32;
d >>= 26;
c += u6 as u64 * rr0;
debug_assert!(u6 >> 26 == 0);
debug_assert!(d >> 35 == 0);
// [d u6 0 0 0 0 0 0 t9 0 0 c-u6*rr0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0]
let t6 = (c & m) as u32;
c >>= 26;
c += u6 as u64 * rr1;
debug_assert!(t6 >> 26 == 0);
debug_assert!(c >> 39 == 0);
// [d u6 0 0 0 0 0 0 t9 0 c-u6*rr1 t6-u6*rr0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0]
// [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0]
c += a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 + a6 * b1 + a7 * b0;
debug_assert!(c <= 0x8000007C00000007u64);
// [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0]
d += a8 * b9 + a9 * b8;
debug_assert!(d >> 58 == 0);
// [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0]
let u7 = (d & m) as u32;
d >>= 26;
c += u7 as u64 * rr0;
debug_assert!(u7 >> 26 == 0);
debug_assert!(d >> 32 == 0);
let d32 = d as u32;
debug_assert!(c <= 0x800001703FFFC2F7u64);
// [d u7 0 0 0 0 0 0 0 t9 0 c-u7*rr0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0]
let t7 = (c & m) as u32;
c >>= 26;
c += u7 as u64 * rr1;
debug_assert!(t7 >> 26 == 0);
debug_assert!(c >> 38 == 0);
// [d u7 0 0 0 0 0 0 0 t9 c-u7*rr1 t7-u7*rr0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0]
// [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0]
c +=
a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 + a6 * b2 + a7 * b1 + a8 * b0;
debug_assert!(c <= 0x9000007B80000008u64);
// [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
d = d32 as u64 + a9 * b9;
debug_assert!(d >> 57 == 0);
// [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
let u8 = (d & m) as u32;
d >>= 26;
c += u8 as u64 * rr0;
debug_assert!(u8 >> 26 == 0);
debug_assert!(d >> 31 == 0);
let d32 = d as u32;
debug_assert!(c <= 0x9000016FBFFFC2F8u64);
// [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
let r3 = t3;
debug_assert!(r3 >> 26 == 0);
// [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
let r4 = t4;
debug_assert!(r4 >> 26 == 0);
// [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
let r5 = t5;
debug_assert!(r5 >> 26 == 0);
// [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
let r6 = t6;
debug_assert!(r6 >> 26 == 0);
// [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
let r7 = t7;
debug_assert!(r7 >> 26 == 0);
// [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
let r8 = (c & m) as u32;
c >>= 26;
c += u8 as u64 * rr1;
debug_assert!(r8 >> 26 == 0);
debug_assert!(c >> 39 == 0);
// [d u8 0 0 0 0 0 0 0 0 t9+c-u8*rr1 r8-u8*rr0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
// [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
c += d32 as u64 * rr0 + t9 as u64;
debug_assert!(c >> 45 == 0);
// [d 0 0 0 0 0 0 0 0 0 c-d*rr0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
let r9 = (c & (m >> 4)) as u32;
c >>= 22;
c += d * (rr1 << 4);
debug_assert!(r9 >> 22 == 0);
debug_assert!(c >> 46 == 0);
// [d 0 0 0 0 0 0 0 0 r9+((c-d*rr1<<4)<<22)-d*rr0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
// [d 0 0 0 0 0 0 0 -d*rr1 r9+(c<<22)-d*rr0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
// [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
d = c * (rr0 >> 4) + t0 as u64;
debug_assert!(d >> 56 == 0);
// [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*rr0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
let r0 = (d & m) as u32;
d >>= 26;
debug_assert!(r0 >> 26 == 0);
debug_assert!(d >> 30 == 0);
let d32 = d as u32;
// [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*rr0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
d = d32 as u64 + c * (rr1 >> 4) + t1 as u64;
debug_assert!(d >> 53 == 0);
debug_assert!(d <= 0x10000003FFFFBFu64);
// [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*rr1>>4 r0-c*rr0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
// [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
let r1 = (d & m) as u32;
d >>= 26;
debug_assert!(r1 >> 26 == 0);
debug_assert!(d >> 27 == 0);
let d32 = d as u32;
debug_assert!(d <= 0x4000000u64);
// [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
d = d32 as u64 + t2 as u64;
debug_assert!(d >> 27 == 0);
// [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
let r2 = d as u32;
debug_assert!(r2 >> 27 == 0);
// [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
Self([r0, r1, r2, r3, r4, r5, r6, r7, r8, r9])
}
/// Returns self * rhs mod p
/// Brings the magnitude to 1 (but doesn't normalize the result).
/// The magnitudes of arguments should be <= 8.
pub fn mul(&self, rhs: &Self) -> Self {
self.mul_inner(rhs)
}
/// Returns self * self
/// Brings the magnitude to 1 (but doesn't normalize the result).
/// The magnitudes of arguments should be <= 8.
pub fn square(&self) -> Self {
self.mul_inner(self)
}
pub fn pow2k(&self, k: u32) -> Self {
let mut x = *self;
for _j in 0..k {
x = x.square();
}
x
}
/// Returns the multiplicative inverse of self, if self is non-zero.
/// The result has magnitude 1, but is not normalized.
pub fn invert(&self) -> Self {
let x2 = self.pow2k(1).mul(self);
let x3 = x2.pow2k(1).mul(self);
let x6 = x3.pow2k(3).mul(&x3);
let x9 = x6.pow2k(3).mul(&x3);
let x11 = x9.pow2k(2).mul(&x2);
let x22 = x11.pow2k(11).mul(&x11);
let x44 = x22.pow2k(22).mul(&x22);
let x88 = x44.pow2k(44).mul(&x44);
let x176 = x88.pow2k(88).mul(&x88);
let x220 = x176.pow2k(44).mul(&x44);
let x223 = x220.pow2k(3).mul(&x3);
// The final result is then assembled using a sliding window over the blocks.
x223.pow2k(23)
.mul(&x22)
.pow2k(5)
.mul(self)
.pow2k(3)
.mul(&x2)
.pow2k(2)
.mul(self)
}
/// Returns the square root of self mod p, or `None` if no square root exists.
/// The result has magnitude 1, but is not normalized.
pub fn sqrt(&self) -> CtOption<Self> {
let x2 = self.pow2k(1).mul(self);
let x3 = x2.pow2k(1).mul(self);
let x6 = x3.pow2k(3).mul(&x3);
let x9 = x6.pow2k(3).mul(&x3);
let x11 = x9.pow2k(2).mul(&x2);
let x22 = x11.pow2k(11).mul(&x11);
let x44 = x22.pow2k(22).mul(&x22);
let x88 = x44.pow2k(44).mul(&x44);
let x176 = x88.pow2k(88).mul(&x88);
let x220 = x176.pow2k(44).mul(&x44);
let x223 = x220.pow2k(3).mul(&x3);
// The final result is then assembled using a sliding window over the blocks.
let res = x223.pow2k(23).mul(&x22).pow2k(6).mul(&x2).pow2k(2);
let is_root = (res.mul(&res).negate(1).add(self)).normalizes_to_zero();
// Only return Some if it's the square root.
CtOption::new(res, is_root)
}
}
impl Default for FieldElement10x26 {
fn default() -> Self {
Self::ZERO
}
}
impl ConditionallySelectable for FieldElement10x26 {
#[inline(always)]
fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self {
Self([
u32::conditional_select(&a.0[0], &b.0[0], choice),
u32::conditional_select(&a.0[1], &b.0[1], choice),
u32::conditional_select(&a.0[2], &b.0[2], choice),
u32::conditional_select(&a.0[3], &b.0[3], choice),
u32::conditional_select(&a.0[4], &b.0[4], choice),
u32::conditional_select(&a.0[5], &b.0[5], choice),
u32::conditional_select(&a.0[6], &b.0[6], choice),
u32::conditional_select(&a.0[7], &b.0[7], choice),
u32::conditional_select(&a.0[8], &b.0[8], choice),
u32::conditional_select(&a.0[9], &b.0[9], choice),
])
}
}
impl ConstantTimeEq for FieldElement10x26 {
fn ct_eq(&self, other: &Self) -> Choice {
self.0[0].ct_eq(&other.0[0])
& self.0[1].ct_eq(&other.0[1])
& self.0[2].ct_eq(&other.0[2])
& self.0[3].ct_eq(&other.0[3])
& self.0[4].ct_eq(&other.0[4])
& self.0[5].ct_eq(&other.0[5])
& self.0[6].ct_eq(&other.0[6])
& self.0[7].ct_eq(&other.0[7])
& self.0[8].ct_eq(&other.0[8])
& self.0[9].ct_eq(&other.0[9])
}
}
impl Zeroize for FieldElement10x26 {
fn zeroize(&mut self) {
self.0.zeroize();
}
}

View File

@@ -0,0 +1,56 @@
use openvm_circuit::arch::{VmExtension, VmInventory};
use openvm_circuit::circuit_derive::{Chip, ChipUsageGetter};
use openvm_circuit::derive::{AnyEnum, InstructionExecutor};
use openvm_circuit::system::phantom::PhantomChip;
use openvm_instructions::PhantomDiscriminant;
use openvm_stark_backend::p3_field::PrimeField32;
use powdr_openvm_hints_transpiler::HintsPhantom;
// this module is mostly copy/pasted code from k256 for the field element representation in 32-bit architectures
mod executors;
mod field10x26_k256;
/// OpenVM extension with miscellaneous hint implementations.
pub struct HintsExtension;
#[derive(ChipUsageGetter, Chip, InstructionExecutor, AnyEnum)]
pub enum HintsExecutor<F: PrimeField32> {
Phantom(PhantomChip<F>),
}
#[derive(ChipUsageGetter, Chip, AnyEnum)]
pub enum HintsPeriphery<F: PrimeField32> {
Phantom(PhantomChip<F>),
}
impl<F: PrimeField32> VmExtension<F> for HintsExtension {
type Executor = HintsExecutor<F>;
type Periphery = HintsPeriphery<F>;
fn build(
&self,
builder: &mut openvm_circuit::arch::VmInventoryBuilder<F>,
) -> Result<
openvm_circuit::arch::VmInventory<Self::Executor, Self::Periphery>,
openvm_circuit::arch::VmInventoryError,
> {
let inventory = VmInventory::new();
builder.add_phantom_sub_executor(
executors::ReverseBytesSubEx,
PhantomDiscriminant(HintsPhantom::HintReverseBytes as u16),
)?;
builder.add_phantom_sub_executor(
executors::K256InverseFieldSubEx,
PhantomDiscriminant(HintsPhantom::HintK256InverseField as u16),
)?;
builder.add_phantom_sub_executor(
executors::K256InverseField10x26SubEx,
PhantomDiscriminant(HintsPhantom::HintK256InverseField10x26 as u16),
)?;
builder.add_phantom_sub_executor(
executors::K256SqrtField10x26SubEx,
PhantomDiscriminant(HintsPhantom::HintK256SqrtField10x26 as u16),
)?;
Ok(inventory)
}
}

View File

@@ -0,0 +1,15 @@
[package]
name = "powdr-openvm-hints-guest"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true
[target.'cfg(target_os = "zkvm")'.dependencies]
openvm-platform = { workspace = true, features = ["rust-runtime"] }
openvm-rv32im-guest.workspace = true
openvm-custom-insn.workspace = true
[dependencies]
strum_macros = "0.27"

View File

@@ -0,0 +1,134 @@
#![no_std]
#[cfg(target_os = "zkvm")]
use openvm_custom_insn; // needed for the hint_store_u32 macro
use strum_macros::FromRepr;
/// This is custom-2 defined in RISC-V spec document
pub const OPCODE: u8 = 0x5b;
pub const HINTS_FUNCT3: u8 = 0b000;
#[derive(Debug, Copy, Clone, PartialEq, Eq, FromRepr)]
#[repr(u8)]
pub enum HintsFunct7 {
ReverseBytes = 0,
K256InverseField,
K256InverseField10x26,
K256SqrtField10x26,
}
#[cfg(target_os = "zkvm")]
#[inline(always)]
fn insn_reverse_bytes(bytes: *const u8) {
openvm_platform::custom_insn_r!(
opcode = OPCODE,
funct3 = HINTS_FUNCT3,
funct7 = HintsFunct7::ReverseBytes as u8,
rd = Const "x0",
rs1 = In bytes,
rs2 = Const "x0"
);
}
#[cfg(target_os = "zkvm")]
#[inline(always)]
fn insn_k256_inverse_field(bytes: *const u8) {
openvm_platform::custom_insn_r!(
opcode = OPCODE,
funct3 = HINTS_FUNCT3,
funct7 = HintsFunct7::K256InverseField as u8,
rd = Const "x0",
rs1 = In bytes,
rs2 = Const "x0"
);
}
#[cfg(target_os = "zkvm")]
#[inline(always)]
fn insn_k256_inverse_field_10x26(bytes: *const u8) {
openvm_platform::custom_insn_r!(
opcode = OPCODE,
funct3 = HINTS_FUNCT3,
funct7 = HintsFunct7::K256InverseField10x26 as u8,
rd = Const "x0",
rs1 = In bytes,
rs2 = Const "x0",
);
}
#[cfg(target_os = "zkvm")]
#[inline(always)]
fn insn_k256_sqrt_field_10x26(bytes: *const u8) {
openvm_platform::custom_insn_r!(
opcode = OPCODE,
funct3 = HINTS_FUNCT3,
funct7 = HintsFunct7::K256SqrtField10x26 as u8,
rd = Const "x0",
rs1 = In bytes,
rs2 = Const "x0",
);
}
// Just an example hint that reverses the bytes of a u32 value.
pub fn hint_reverse_bytes(val: u32) -> u32 {
#[cfg(target_os = "zkvm")]
{
let result = core::mem::MaybeUninit::<u32>::uninit();
insn_reverse_bytes(&val as *const u32 as *const u8);
unsafe {
openvm_rv32im_guest::hint_store_u32!(result.as_ptr() as *const u32);
result.assume_init()
}
}
#[cfg(not(target_os = "zkvm"))]
{
((val & 0x000000FF) << 24)
| ((val & 0x0000FF00) << 8)
| ((val & 0x00FF0000) >> 8)
| ((val & 0xFF000000) >> 24)
}
}
// Inverse of field element in SECP256k1 modulus (if not zero).
// The caller is responsible for handling the zero input case, and the returned value is zero in that case.
#[cfg(target_os = "zkvm")]
pub fn hint_k256_inverse_field(sec1_bytes: &[u8]) -> [u8; 32] {
insn_k256_inverse_field(sec1_bytes.as_ptr() as *const u8);
let inverse = core::mem::MaybeUninit::<[u8; 32]>::uninit();
unsafe {
openvm_rv32im_guest::hint_buffer_u32!(inverse.as_ptr() as *const u8, 8);
inverse.assume_init()
}
}
// Inverse of field element in SECP256k1 modulus (if not zero).
// Takes in the raw 32-bit architecture representation of the field element from k256 (`FieldElement10x26`).
// The caller is responsible for handling the zero input case, and the returned value is undefined in that case.
#[cfg(target_os = "zkvm")]
pub fn hint_k256_inverse_field_10x26(elem: [u32; 10]) -> [u32; 10] {
insn_k256_inverse_field_10x26(elem.as_ptr() as *const u8);
let inverse = core::mem::MaybeUninit::<[u32; 10]>::uninit();
unsafe {
openvm_rv32im_guest::hint_buffer_u32!(inverse.as_ptr() as *const u8, 10);
inverse.assume_init()
}
}
// Square root of a field element in SECP256k1 modulus (if exists).
#[cfg(target_os = "zkvm")]
pub fn hint_k256_sqrt_field_10x26(elem: [u32; 10]) -> Option<[u32; 10]> {
insn_k256_sqrt_field_10x26(elem.as_ptr() as *const u8);
// read "boolean" result of whether the square root exists
let has_sqrt = core::mem::MaybeUninit::<u32>::uninit();
unsafe {
openvm_rv32im_guest::hint_store_u32!(has_sqrt.as_ptr() as *const u32);
if has_sqrt.assume_init() == 0 {
return None;
}
}
// read actual square root value
let sqrt = core::mem::MaybeUninit::<[u32; 10]>::uninit();
unsafe {
openvm_rv32im_guest::hint_buffer_u32!(sqrt.as_ptr() as *const u8, 10);
Some(sqrt.assume_init())
}
}

View File

@@ -0,0 +1,17 @@
[package]
name = "powdr-openvm-hints-transpiler"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true
[dependencies]
openvm-stark-backend = { workspace = true }
openvm-instructions = { workspace = true }
openvm-transpiler = { workspace = true }
openvm-instructions-derive = { workspace = true }
rrs-lib = "0.1.0"
strum = { version = "0.27", features = ["derive"] }
powdr-openvm-hints-guest = { workspace = true }

View File

@@ -0,0 +1,68 @@
use openvm_instructions::{
instruction::Instruction, riscv::RV32_REGISTER_NUM_LIMBS, LocalOpcode, PhantomDiscriminant,
};
use openvm_instructions_derive::LocalOpcode;
use openvm_stark_backend::p3_field::PrimeField32;
use openvm_transpiler::{TranspilerExtension, TranspilerOutput};
use powdr_openvm_hints_guest::{HintsFunct7, HINTS_FUNCT3, OPCODE};
use rrs_lib::instruction_formats::RType;
use strum::{EnumCount, EnumIter, FromRepr};
#[derive(
Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, EnumCount, EnumIter, FromRepr, LocalOpcode,
)]
#[opcode_offset = 0x800]
#[repr(usize)]
pub enum HintsOpcode {
HINTS,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, FromRepr)]
#[repr(u16)]
pub enum HintsPhantom {
// idk if there is a "proper" way for avoiding conflicts in this number,
// just looked at ovm code and picked the next range that didn't seem to be
// used
HintReverseBytes = 0x60,
HintK256InverseField = 0x61,
HintK256InverseField10x26 = 0x62,
HintK256SqrtField10x26 = 0x63,
}
#[derive(Default)]
pub struct HintsTranspilerExtension;
impl<F: PrimeField32> TranspilerExtension<F> for HintsTranspilerExtension {
fn process_custom(&self, instruction_stream: &[u32]) -> Option<TranspilerOutput<F>> {
if instruction_stream.is_empty() {
return None;
}
let instruction_u32 = instruction_stream[0];
let opcode = (instruction_u32 & 0x7f) as u8;
if opcode != OPCODE {
return None;
}
let insn = RType::new(instruction_u32);
if insn.funct3 as u8 != HINTS_FUNCT3 {
return None;
}
let funct7 = HintsFunct7::from_repr(insn.funct7 as u8)?;
let disc = match funct7 {
HintsFunct7::ReverseBytes => HintsPhantom::HintReverseBytes,
HintsFunct7::K256InverseField => HintsPhantom::HintK256InverseField,
HintsFunct7::K256InverseField10x26 => HintsPhantom::HintK256InverseField10x26,
HintsFunct7::K256SqrtField10x26 => HintsPhantom::HintK256SqrtField10x26,
};
let instruction = Instruction::phantom(
PhantomDiscriminant(disc as u16),
F::from_canonical_usize(RV32_REGISTER_NUM_LIMBS * insn.rs1),
F::ZERO,
0,
);
Some(TranspilerOutput::one_to_one(instruction))
}
}

View File

@@ -0,0 +1,15 @@
[workspace]
[package]
name = "powdr-openvm-guest-hints-test"
version = "0.0.0"
edition = "2021"
[dependencies]
# The `rev` here must point to the same version used in the workspace.
# Otherwise, there is conflict with the `powdr-openvm-hints-guest` dependency (which is part of the workspace).
openvm = { git = "https://github.com/powdr-labs/openvm.git", rev = "391b737" }
powdr-openvm-hints-guest = { path = "../extensions/hints-guest/" }
[profile.release-with-debug]
inherits = "release"
debug = true

View File

@@ -0,0 +1,10 @@
#![cfg_attr(target_os = "zkvm", no_main)]
#![cfg_attr(target_os = "zkvm", no_std)]
openvm::entry!(main);
use powdr_openvm_hints_guest::hint_reverse_bytes;
pub fn main() {
let res = hint_reverse_bytes(0x11223344);
assert_eq!(res, 0x44332211);
}

View File

@@ -143,17 +143,14 @@ impl<'a, F: PrimeField32> Program<Instr<F>> for Prog<'a, F> {
}
pub fn customize(
OriginalCompiledProgram {
mut exe,
sdk_vm_config,
}: OriginalCompiledProgram,
OriginalCompiledProgram { mut exe, vm_config }: OriginalCompiledProgram,
labels: &BTreeSet<u32>,
debug_info: &DebugInfo,
config: PowdrConfig,
implementation: PrecompileImplementation,
pgo_config: PgoConfig,
) -> CompiledProgram {
let original_config = OriginalVmConfig::new(sdk_vm_config.clone());
let original_config = OriginalVmConfig::new(vm_config.clone());
let airs = original_config.airs().expect("Failed to convert the AIR of an OpenVM instruction, even after filtering by the blacklist!");
let bus_map = original_config.bus_map();

View File

@@ -5,12 +5,14 @@ use crate::air_builder::AirKeygenBuilder;
use crate::bus_map::{BusMap, OpenVmBusType};
use crate::opcode::branch_opcodes_set;
use crate::{opcode::instruction_allowlist, BabyBearSC, SpecializedConfig};
use crate::{AirMetrics, Instr, SpecializedExecutor, APP_LOG_BLOWUP};
use crate::{
AirMetrics, ExtendedVmConfig, ExtendedVmConfigExecutor, ExtendedVmConfigPeriphery, Instr,
SpecializedExecutor, APP_LOG_BLOWUP,
};
use openvm_circuit::arch::{VmChipComplex, VmConfig, VmInventoryError};
use openvm_circuit_primitives::bitwise_op_lookup::SharedBitwiseOperationLookupChip;
use openvm_circuit_primitives::range_tuple::SharedRangeTupleCheckerChip;
use openvm_instructions::VmOpcode;
use openvm_sdk::config::{SdkVmConfig, SdkVmConfigExecutor, SdkVmConfigPeriphery};
use openvm_stark_backend::air_builders::symbolic::SymbolicRapBuilder;
use openvm_stark_backend::interaction::fri_log_up::find_interaction_chunks;
use openvm_stark_backend::{
@@ -109,8 +111,13 @@ fn to_option<T>(mut v: Vec<T>) -> Option<T> {
}
/// A lazy chip complex that is initialized on the first access
type LazyChipComplex =
Option<VmChipComplex<BabyBear, SdkVmConfigExecutor<BabyBear>, SdkVmConfigPeriphery<BabyBear>>>;
type LazyChipComplex = Option<
VmChipComplex<
BabyBear,
ExtendedVmConfigExecutor<BabyBear>,
ExtendedVmConfigPeriphery<BabyBear>,
>,
>;
/// A shared and mutable reference to a `LazyChipComplex`.
type CachedChipComplex = Arc<Mutex<LazyChipComplex>>;
@@ -121,8 +128,11 @@ pub struct ChipComplexGuard<'a> {
}
impl<'a> Deref for ChipComplexGuard<'a> {
type Target =
VmChipComplex<BabyBear, SdkVmConfigExecutor<BabyBear>, SdkVmConfigPeriphery<BabyBear>>;
type Target = VmChipComplex<
BabyBear,
ExtendedVmConfigExecutor<BabyBear>,
ExtendedVmConfigPeriphery<BabyBear>,
>;
fn deref(&self) -> &Self::Target {
// Unwrap is safe here because we ensure that the chip complex is initialized
@@ -132,27 +142,27 @@ impl<'a> Deref for ChipComplexGuard<'a> {
}
}
/// A wrapper around the `SdkVmConfig` that caches a chip complex.
/// A wrapper around the `ExtendedVmConfig` that caches a chip complex.
#[derive(Serialize, Deserialize, Clone)]
pub struct OriginalVmConfig {
sdk_config: SdkVmConfig,
sdk_config: ExtendedVmConfig,
#[serde(skip)]
chip_complex: CachedChipComplex,
}
impl OriginalVmConfig {
pub fn new(sdk_config: SdkVmConfig) -> Self {
pub fn new(sdk_config: ExtendedVmConfig) -> Self {
Self {
sdk_config,
chip_complex: Default::default(),
}
}
pub fn config(&self) -> &SdkVmConfig {
pub fn config(&self) -> &ExtendedVmConfig {
&self.sdk_config
}
pub fn config_mut(&mut self) -> &mut SdkVmConfig {
pub fn config_mut(&mut self) -> &mut ExtendedVmConfig {
let mut guard = self.chip_complex.lock().expect("Mutex poisoned");
*guard = None; // Invalidate cache
&mut self.sdk_config
@@ -268,7 +278,11 @@ impl OriginalVmConfig {
pub fn create_chip_complex(
&self,
) -> Result<
VmChipComplex<BabyBear, SdkVmConfigExecutor<BabyBear>, SdkVmConfigPeriphery<BabyBear>>,
VmChipComplex<
BabyBear,
ExtendedVmConfigExecutor<BabyBear>,
ExtendedVmConfigPeriphery<BabyBear>,
>,
VmInventoryError,
> {
// Clear the cache
@@ -490,7 +504,7 @@ mod tests {
use openvm_ecc_circuit::{WeierstrassExtension, SECP256K1_CONFIG};
use openvm_pairing_circuit::{PairingCurve, PairingExtension};
use openvm_rv32im_circuit::Rv32M;
use openvm_sdk::config::SdkSystemConfig;
use openvm_sdk::config::{SdkSystemConfig, SdkVmConfig};
#[test]
fn test_get_bus_map() {
@@ -524,7 +538,7 @@ mod tests {
supported_curves.push(bls_config.clone());
supported_pairing_curves.push(PairingCurve::Bls12_381);
}
let vm_config = SdkVmConfig::builder()
let sdk_vm_config = SdkVmConfig::builder()
.system(system_config.into())
.rv32i(Default::default())
.rv32m(rv32m)
@@ -538,17 +552,18 @@ mod tests {
.pairing(PairingExtension::new(supported_pairing_curves))
.build();
let _ = OriginalVmConfig::new(vm_config).bus_map();
let _ = OriginalVmConfig::new(ExtendedVmConfig { sdk_vm_config }).bus_map();
}
#[test]
fn test_export_pil() {
let writer = &mut Vec::new();
let base_config = OriginalVmConfig::new(
SdkVmConfig::builder()
let ext_config = ExtendedVmConfig {
sdk_vm_config: SdkVmConfig::builder()
.system(SdkSystemConfig::default())
.build(),
);
};
let base_config = OriginalVmConfig::new(ext_config);
let specialized_config = SpecializedConfig::new(
base_config,
vec![],

View File

@@ -27,6 +27,8 @@ use openvm_stark_sdk::openvm_stark_backend::p3_field::PrimeField32;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::{execution_profile::execution_profile, PowdrConfig};
use powdr_extension::{PowdrExecutor, PowdrExtension, PowdrPeriphery};
use powdr_openvm_hints_circuit::{HintsExecutor, HintsExtension, HintsPeriphery};
use powdr_openvm_hints_transpiler::HintsTranspilerExtension;
use serde::{Deserialize, Serialize};
use std::cmp::Reverse;
use std::fs::File;
@@ -145,7 +147,7 @@ impl InitFileGenerator for SpecializedConfig {
#[derive(ChipUsageGetter, From, AnyEnum, InstructionExecutor, Chip)]
pub enum SpecializedExecutor<F: PrimeField32> {
#[any_enum]
SdkExecutor(SdkVmConfigExecutor<F>),
SdkExecutor(ExtendedVmConfigExecutor<F>),
#[any_enum]
PowdrExecutor(PowdrExecutor<F>),
}
@@ -153,7 +155,7 @@ pub enum SpecializedExecutor<F: PrimeField32> {
#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
pub enum MyPeriphery<F: PrimeField32> {
#[any_enum]
SdkPeriphery(SdkVmConfigPeriphery<F>),
SdkPeriphery(ExtendedVmConfigPeriphery<F>),
#[any_enum]
PowdrPeriphery(PowdrPeriphery<F>),
}
@@ -263,10 +265,17 @@ pub fn compile_openvm(
Default::default(),
)?;
// Transpile the ELF into a VmExe. Note that this happens using the sdk transpiler only, our extension does not use a transpiler.
let exe = sdk.transpile(elf, sdk_vm_config.transpiler())?;
// Transpile the ELF into a VmExe.
let mut transpiler = sdk_vm_config.transpiler();
Ok(OriginalCompiledProgram { exe, sdk_vm_config })
// Add our custom transpiler extensions
transpiler = transpiler.with_extension(HintsTranspilerExtension {});
let exe = sdk.transpile(elf, transpiler)?;
let vm_config = ExtendedVmConfig { sdk_vm_config };
Ok(OriginalCompiledProgram { exe, vm_config })
}
/// Determines how the precompile (a circuit with algebraic gates and bus interactions)
@@ -409,9 +418,72 @@ pub struct CompiledProgram {
#[derive(Clone)]
pub struct OriginalCompiledProgram {
pub exe: VmExe<BabyBear>,
pub vm_config: ExtendedVmConfig,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
// SdkVmConfig plus custom openvm extensions, before autoprecompile transformations.
// For now, only includes custom hints.
pub struct ExtendedVmConfig {
pub sdk_vm_config: SdkVmConfig,
}
impl VmConfig<BabyBear> for ExtendedVmConfig {
type Executor = ExtendedVmConfigExecutor<BabyBear>;
type Periphery = ExtendedVmConfigPeriphery<BabyBear>;
fn system(&self) -> &SystemConfig {
&self.sdk_vm_config.system.config
}
fn system_mut(&mut self) -> &mut SystemConfig {
&mut self.sdk_vm_config.system.config
}
fn create_chip_complex(
&self,
) -> std::result::Result<
VmChipComplex<BabyBear, Self::Executor, Self::Periphery>,
VmInventoryError,
> {
let mut complex = self.sdk_vm_config.create_chip_complex()?.transmute();
complex = complex.extend(&HintsExtension)?;
Ok(complex)
}
}
impl InitFileGenerator for ExtendedVmConfig {
fn generate_init_file_contents(&self) -> Option<String> {
self.sdk_vm_config.generate_init_file_contents()
}
fn write_to_init_file(
&self,
manifest_dir: &Path,
init_file_name: Option<&str>,
) -> eyre::Result<()> {
self.sdk_vm_config
.write_to_init_file(manifest_dir, init_file_name)
}
}
#[derive(ChipUsageGetter, Chip, InstructionExecutor, From, AnyEnum)]
#[allow(clippy::large_enum_variant)]
pub enum ExtendedVmConfigExecutor<F: PrimeField32> {
#[any_enum]
Sdk(SdkVmConfigExecutor<F>),
#[any_enum]
Hints(HintsExecutor<F>),
}
#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
pub enum ExtendedVmConfigPeriphery<F: PrimeField32> {
#[any_enum]
Sdk(SdkVmConfigPeriphery<F>),
#[any_enum]
Hints(HintsPeriphery<F>),
}
#[derive(Clone, Serialize, Deserialize, Default, Debug, Eq, PartialEq)]
pub struct AirMetrics {
pub widths: AirWidths,
@@ -520,6 +592,7 @@ pub fn prove(
vm_config
.sdk_config
.config_mut()
.sdk_vm_config
.system
.config
.segmentation_strategy = Arc::new(
@@ -613,14 +686,14 @@ pub fn execution_profile_from_guest(
guest_opts: GuestOptions,
inputs: StdIn,
) -> HashMap<u64, u32> {
let OriginalCompiledProgram { exe, sdk_vm_config } = compile_openvm(guest, guest_opts).unwrap();
let OriginalCompiledProgram { exe, vm_config } = compile_openvm(guest, guest_opts).unwrap();
let program = Prog::from(&exe.program);
// prepare for execute
let sdk = Sdk::default();
execution_profile::<BabyBearOpenVmApcAdapter>(&program, || {
sdk.execute(exe.clone(), sdk_vm_config.clone(), inputs.clone())
sdk.execute(exe.clone(), vm_config.clone(), inputs.clone())
.unwrap();
})
}
@@ -740,6 +813,8 @@ mod tests {
const GUEST_SHA256_APC_PGO_LARGE: u64 = 50;
const GUEST_SHA256_SKIP: u64 = 0;
const GUEST_HINTS_TEST: &str = "guest-hints-test";
#[test]
fn guest_prove_simple() {
let mut stdin = StdIn::default();
@@ -1168,6 +1243,23 @@ mod tests {
);
}
#[test]
/// check that the hints test guest compiles and proves successfully
fn hints_test_prove() {
let mut stdin = StdIn::default();
stdin.write(&GUEST_HINTS_TEST);
let config = default_powdr_openvm_config(0, 0);
prove_simple(
GUEST_SHA256,
config,
PrecompileImplementation::SingleRowChip,
stdin,
PgoConfig::None,
None,
);
}
// #[test]
// #[ignore = "Too much RAM"]
// // TODO: This test currently panics because the kzg params are not set up correctly. Fix this.

View File

@@ -7,7 +7,7 @@ use std::{
use crate::{
extraction_utils::OriginalAirs, powdr_extension::executor::PowdrPeripheryInstances,
utils::algebraic_to_symbolic,
utils::algebraic_to_symbolic, ExtendedVmConfig,
};
use super::{executor::PowdrExecutor, opcode::PowdrOpcode, PowdrPrecompile};
@@ -18,7 +18,6 @@ use openvm_circuit::{
system::memory::OfflineMemory,
};
use openvm_instructions::{instruction::Instruction, LocalOpcode};
use openvm_sdk::config::SdkVmConfig;
use openvm_stark_backend::{
air_builders::symbolic::{
symbolic_expression::{SymbolicEvaluator, SymbolicExpression},
@@ -54,7 +53,7 @@ impl<F: PrimeField32> PowdrChip<F> {
precompile: PowdrPrecompile<F>,
original_airs: OriginalAirs<F>,
memory: Arc<Mutex<OfflineMemory<F>>>,
base_config: SdkVmConfig,
base_config: ExtendedVmConfig,
periphery: PowdrPeripheryInstances,
) -> Self {
let PowdrPrecompile {

View File

@@ -8,9 +8,10 @@ use openvm_circuit_primitives::{
bitwise_op_lookup::SharedBitwiseOperationLookupChip, range_tuple::SharedRangeTupleCheckerChip,
var_range::SharedVariableRangeCheckerChip, Chip, ChipUsageGetter,
};
use openvm_sdk::config::{SdkVmConfigExecutor, SdkVmConfigPeriphery};
use openvm_stark_backend::p3_field::PrimeField32;
use crate::{ExtendedVmConfigExecutor, ExtendedVmConfigPeriphery};
/// A dummy inventory used for execution of autoprecompiles
/// It extends the `SdkVmConfigExecutor` and `SdkVmConfigPeriphery`, providing them with shared, pre-loaded periphery chips to avoid memory allocations by each SDK chip
pub type DummyInventory<F> = VmInventory<DummyExecutor<F>, DummyPeriphery<F>>;
@@ -20,7 +21,7 @@ pub type DummyChipComplex<F> = VmChipComplex<F, DummyExecutor<F>, DummyPeriphery
#[derive(ChipUsageGetter, Chip, InstructionExecutor, AnyEnum, From)]
pub enum DummyExecutor<F: PrimeField32> {
#[any_enum]
Sdk(SdkVmConfigExecutor<F>),
Sdk(ExtendedVmConfigExecutor<F>),
#[any_enum]
Shared(SharedExecutor<F>),
#[any_enum]
@@ -32,7 +33,7 @@ pub enum DummyExecutor<F: PrimeField32> {
#[derive(ChipUsageGetter, Chip, AnyEnum, From)]
pub enum DummyPeriphery<F: PrimeField32> {
#[any_enum]
Sdk(SdkVmConfigPeriphery<F>),
Sdk(ExtendedVmConfigPeriphery<F>),
#[any_enum]
Shared(SharedPeriphery<F>),
#[any_enum]
@@ -75,6 +76,11 @@ mod from_implementations {
Rv32MPeriphery,
};
use openvm_sha256_circuit::{Sha256Executor, Sha256Periphery};
use powdr_openvm_hints_circuit::HintsExecutor;
use powdr_openvm_hints_circuit::HintsPeriphery;
use crate::ExtendedVmConfigExecutor;
use crate::ExtendedVmConfigPeriphery;
/// Defines `From<T> for DummyExecutor` and `From<T> for DummyPeriphery`
/// by mapping to the appropriate `SdkVmConfigExecutor` and `SdkVmConfigPeriphery` variant.
@@ -84,19 +90,31 @@ mod from_implementations {
$(
impl<F: PrimeField32> From<$executor_ty> for DummyExecutor<F> {
fn from(executor: $executor_ty) -> Self {
DummyExecutor::Sdk(SdkVmConfigExecutor::$variant(executor))
DummyExecutor::Sdk(ExtendedVmConfigExecutor::Sdk(SdkVmConfigExecutor::$variant(executor)))
}
}
impl<F: PrimeField32> From<$periphery_ty> for DummyPeriphery<F> {
fn from(periphery: $periphery_ty) -> Self {
DummyPeriphery::Sdk(SdkVmConfigPeriphery::$variant(periphery))
DummyPeriphery::Sdk(ExtendedVmConfigPeriphery::Sdk(SdkVmConfigPeriphery::$variant(periphery)))
}
}
)*
};
}
impl<F: PrimeField32> From<HintsExecutor<F>> for DummyExecutor<F> {
fn from(executor: HintsExecutor<F>) -> Self {
DummyExecutor::Sdk(ExtendedVmConfigExecutor::Hints(executor))
}
}
impl<F: PrimeField32> From<HintsPeriphery<F>> for DummyPeriphery<F> {
fn from(executor: HintsPeriphery<F>) -> Self {
DummyPeriphery::Sdk(ExtendedVmConfigPeriphery::Hints(executor))
}
}
impl_zero_cost_conversions!(
(Rv32i, Rv32IExecutor<F>, Rv32IPeriphery<F>),
(Io, Rv32IoExecutor<F>, Rv32IoPeriphery<F>),

View File

@@ -9,7 +9,7 @@ use crate::{
inventory::{DummyChipComplex, DummyInventory},
periphery::SharedPeripheryChips,
},
Instr,
ExtendedVmConfig, Instr,
};
use super::{
@@ -28,7 +28,6 @@ use openvm_circuit::{
},
};
use openvm_native_circuit::CastFExtension;
use openvm_sdk::config::SdkVmConfig;
use openvm_stark_backend::{
p3_field::FieldAlgebra, p3_matrix::Matrix, p3_maybe_rayon::prelude::ParallelIterator,
};
@@ -55,6 +54,7 @@ mod inventory;
mod periphery;
pub use periphery::PowdrPeripheryInstances;
use powdr_openvm_hints_circuit::HintsExtension;
/// A struct which holds the state of the execution based on the original instructions in this block and a dummy inventory.
pub struct PowdrExecutor<F: PrimeField32> {
@@ -72,7 +72,7 @@ impl<F: PrimeField32> PowdrExecutor<F> {
air_by_opcode_id: OriginalAirs<F>,
is_valid_column: AlgebraicReference,
memory: Arc<Mutex<OfflineMemory<F>>>,
base_config: SdkVmConfig,
base_config: ExtendedVmConfig,
periphery: PowdrPeripheryInstances,
) -> Self {
Self {
@@ -388,7 +388,7 @@ fn global_index<F>(
fn create_chip_complex_with_memory<F: PrimeField32>(
memory: Arc<Mutex<OfflineMemory<F>>>,
shared_chips: SharedPeripheryChips,
base_config: SdkVmConfig,
base_config: ExtendedVmConfig,
) -> std::result::Result<DummyChipComplex<F>, VmInventoryError> {
use openvm_keccak256_circuit::Keccak256;
use openvm_native_circuit::Native;
@@ -396,7 +396,12 @@ fn create_chip_complex_with_memory<F: PrimeField32>(
use openvm_sha256_circuit::Sha256;
let this = base_config;
let mut complex: DummyChipComplex<F> = this.system.config.create_chip_complex()?.transmute();
let mut complex: DummyChipComplex<F> = this
.sdk_vm_config
.system
.config
.create_chip_complex()?
.transmute();
// CHANGE: inject the correct memory here to be passed to the chips, to be accessible in their get_proof_input
complex.base.memory_controller.offline_memory = memory.clone();
@@ -407,28 +412,28 @@ fn create_chip_complex_with_memory<F: PrimeField32>(
complex = complex.extend(&shared_chips)?;
// END CHANGE
if this.rv32i.is_some() {
if this.sdk_vm_config.rv32i.is_some() {
complex = complex.extend(&Rv32I)?;
}
if this.io.is_some() {
if this.sdk_vm_config.io.is_some() {
complex = complex.extend(&Rv32Io)?;
}
if this.keccak.is_some() {
if this.sdk_vm_config.keccak.is_some() {
complex = complex.extend(&Keccak256)?;
}
if this.sha256.is_some() {
if this.sdk_vm_config.sha256.is_some() {
complex = complex.extend(&Sha256)?;
}
if this.native.is_some() {
if this.sdk_vm_config.native.is_some() {
complex = complex.extend(&Native)?;
}
if this.castf.is_some() {
if this.sdk_vm_config.castf.is_some() {
complex = complex.extend(&CastFExtension)?;
}
if let Some(rv32m) = this.rv32m {
if let Some(rv32m) = this.sdk_vm_config.rv32m {
let mut rv32m = rv32m;
if let Some(ref bigint) = this.bigint {
if let Some(ref bigint) = this.sdk_vm_config.bigint {
rv32m.range_tuple_checker_sizes[0] =
rv32m.range_tuple_checker_sizes[0].max(bigint.range_tuple_checker_sizes[0]);
rv32m.range_tuple_checker_sizes[1] =
@@ -436,9 +441,9 @@ fn create_chip_complex_with_memory<F: PrimeField32>(
}
complex = complex.extend(&rv32m)?;
}
if let Some(bigint) = this.bigint {
if let Some(bigint) = this.sdk_vm_config.bigint {
let mut bigint = bigint;
if let Some(ref rv32m) = this.rv32m {
if let Some(ref rv32m) = this.sdk_vm_config.rv32m {
bigint.range_tuple_checker_sizes[0] =
rv32m.range_tuple_checker_sizes[0].max(bigint.range_tuple_checker_sizes[0]);
bigint.range_tuple_checker_sizes[1] =
@@ -446,18 +451,21 @@ fn create_chip_complex_with_memory<F: PrimeField32>(
}
complex = complex.extend(&bigint)?;
}
if let Some(ref modular) = this.modular {
if let Some(ref modular) = this.sdk_vm_config.modular {
complex = complex.extend(modular)?;
}
if let Some(ref fp2) = this.fp2 {
if let Some(ref fp2) = this.sdk_vm_config.fp2 {
complex = complex.extend(fp2)?;
}
if let Some(ref pairing) = this.pairing {
if let Some(ref pairing) = this.sdk_vm_config.pairing {
complex = complex.extend(pairing)?;
}
if let Some(ref ecc) = this.ecc {
if let Some(ref ecc) = this.sdk_vm_config.ecc {
complex = complex.extend(ecc)?;
}
// add custom extensions
complex = complex.extend(&HintsExtension)?;
Ok(complex)
}

View File

@@ -11,6 +11,7 @@ use crate::powdr_extension::plonk::air::PlonkColumns;
use crate::powdr_extension::plonk::copy_constraint::generate_permutation_columns;
use crate::powdr_extension::PowdrOpcode;
use crate::powdr_extension::PowdrPrecompile;
use crate::ExtendedVmConfig;
use itertools::Itertools;
use openvm_circuit::utils::next_power_of_two_or_zero;
use openvm_circuit::{
@@ -19,7 +20,6 @@ use openvm_circuit::{
};
use openvm_instructions::instruction::Instruction;
use openvm_instructions::LocalOpcode;
use openvm_sdk::config::SdkVmConfig;
use openvm_stark_backend::p3_air::BaseAir;
use openvm_stark_backend::p3_field::FieldAlgebra;
use openvm_stark_backend::p3_matrix::dense::RowMajorMatrix;
@@ -51,7 +51,7 @@ impl<F: PrimeField32> PlonkChip<F> {
precompile: PowdrPrecompile<F>,
original_airs: OriginalAirs<F>,
memory: Arc<Mutex<OfflineMemory<F>>>,
base_config: SdkVmConfig,
base_config: ExtendedVmConfig,
periphery: PowdrPeripheryInstances,
bus_map: BusMap,
copy_constraint_bus_id: u16,

View File

@@ -22,7 +22,6 @@ use openvm_circuit_primitives::range_tuple::SharedRangeTupleCheckerChip;
use openvm_circuit_primitives::var_range::SharedVariableRangeCheckerChip;
use openvm_instructions::VmOpcode;
use openvm_instructions::{instruction::Instruction, LocalOpcode};
use openvm_sdk::config::{SdkVmConfig, SdkVmConfigPeriphery};
use openvm_stark_backend::{
p3_field::{Field, PrimeField32},
ChipUsageGetter,
@@ -30,7 +29,7 @@ use openvm_stark_backend::{
use powdr_autoprecompiles::SymbolicMachine;
use serde::{Deserialize, Serialize};
use crate::PrecompileImplementation;
use crate::{ExtendedVmConfig, ExtendedVmConfigPeriphery, PrecompileImplementation};
use super::plonk::chip::PlonkChip;
use super::{chip::PowdrChip, PowdrOpcode};
@@ -39,7 +38,7 @@ use super::{chip::PowdrChip, PowdrOpcode};
#[serde(bound = "F: Field")]
pub struct PowdrExtension<F> {
pub precompiles: Vec<PowdrPrecompile<F>>,
pub base_config: SdkVmConfig,
pub base_config: ExtendedVmConfig,
pub implementation: PrecompileImplementation,
pub bus_map: BusMap,
pub airs: OriginalAirs<F>,
@@ -102,7 +101,7 @@ impl<F> PowdrPrecompile<F> {
impl<F> PowdrExtension<F> {
pub fn new(
precompiles: Vec<PowdrPrecompile<F>>,
base_config: SdkVmConfig,
base_config: ExtendedVmConfig,
implementation: PrecompileImplementation,
bus_map: BusMap,
airs: OriginalAirs<F>,
@@ -135,7 +134,7 @@ impl<F: PrimeField32> PowdrExecutor<F> {
#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
pub enum PowdrPeriphery<F: PrimeField32> {
Sdk(SdkVmConfigPeriphery<F>),
Sdk(ExtendedVmConfigPeriphery<F>),
Phantom(PhantomChip<F>),
}

View File

@@ -8,6 +8,7 @@ use powdr_openvm::bus_interaction_handler::OpenVmBusInteractionHandler;
use powdr_openvm::extraction_utils::OriginalVmConfig;
use powdr_openvm::instruction_formatter::openvm_instruction_formatter;
use powdr_openvm::BabyBearOpenVmApcAdapter;
use powdr_openvm::ExtendedVmConfig;
use powdr_openvm::Instr;
use powdr_openvm::{bus_map::default_openvm_bus_map, OPENVM_DEGREE_BOUND};
use pretty_assertions::assert_eq;
@@ -23,7 +24,9 @@ fn compile(basic_block: Vec<Instruction<BabyBear>>) -> String {
.io(Default::default())
.build();
let original_config = OriginalVmConfig::new(sdk_vm_config);
let ext_vm_config = ExtendedVmConfig { sdk_vm_config };
let original_config = OriginalVmConfig::new(ext_vm_config);
let airs = original_config.airs().unwrap();
let bus_map = original_config.bus_map();