Generalize sorted lookup.

This commit is contained in:
chriseth
2023-03-08 18:14:57 +01:00
parent b046d7d03c
commit 41dc08e01d
9 changed files with 329 additions and 96 deletions

View File

@@ -4,7 +4,7 @@ use crate::number::{format_number, is_zero, AbstractNumberType, GOLDILOCKS_MOD};
use super::util::WitnessColumnNamer;
/// An expression affine in the committed polynomials.
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone)]
pub struct AffineExpression {
pub coefficients: Vec<AbstractNumberType>,
pub offset: AbstractNumberType,
@@ -37,7 +37,7 @@ impl AffineExpression {
}
pub fn is_constant(&self) -> bool {
self.coefficients.iter().all(is_zero)
self.nonzero_coefficients().next().is_none()
}
pub fn constant_value(&self) -> Option<AbstractNumberType> {
@@ -48,6 +48,18 @@ impl AffineExpression {
}
}
pub fn nonzero_variables(&self) -> Vec<usize> {
self.nonzero_coefficients().map(|(i, _)| i).collect()
}
/// @returns an iterator of the nonzero coefficients and their variable IDs (but not the offset).
pub fn nonzero_coefficients(&self) -> impl Iterator<Item = (usize, &AbstractNumberType)> {
self.coefficients
.iter()
.enumerate()
.filter(|(_, c)| !is_zero(c))
}
pub fn mul(mut self, factor: AbstractNumberType) -> AffineExpression {
let fac = clamp(factor);
for f in &mut self.coefficients {
@@ -61,11 +73,7 @@ impl AffineExpression {
/// returns the index of the variable and the assignment that evaluates the
/// affine expression to zero.
pub fn solve(&self) -> Option<(usize, AbstractNumberType)> {
let mut nonzero = self
.coefficients
.iter()
.enumerate()
.filter(|(_, c)| !is_zero(c));
let mut nonzero = self.nonzero_coefficients();
nonzero.next().and_then(|(i, c)| {
if nonzero.next().is_none() {
// c * a + o = 0 <=> a = -o/c
@@ -93,14 +101,11 @@ impl AffineExpression {
}
pub fn format(&self, namer: &impl WitnessColumnNamer) -> String {
self.coefficients
.iter()
.enumerate()
.filter(|(_, c)| !is_zero(c))
self.nonzero_coefficients()
.map(|(i, c)| {
let name = namer.name(i);
if *c == 1.into() {
name.to_string()
name
} else if *c == (-1).into() {
format!("-{name}")
} else {
@@ -144,6 +149,12 @@ fn inv(x: AbstractNumberType, m: AbstractNumberType) -> AbstractNumberType {
pow(x, m.clone() - 2, m)
}
impl PartialEq for AffineExpression {
fn eq(&self, other: &Self) -> bool {
self.offset == other.offset && self.nonzero_coefficients().eq(other.nonzero_coefficients())
}
}
impl std::ops::Add for AffineExpression {
type Output = AffineExpression;

View File

@@ -0,0 +1,43 @@
use super::affine_expression::AffineExpression;
use super::eval_error::EvalError;
use super::expression_evaluator::SymbolicVariables;
use super::FixedData;
/// Evaluates only fixed columns on a specific row.
pub struct FixedEvaluator<'a> {
fixed_data: &'a FixedData<'a>,
row: usize,
}
impl<'a> FixedEvaluator<'a> {
pub fn new(fixed_data: &'a FixedData<'a>, row: usize) -> Self {
FixedEvaluator { fixed_data, row }
}
}
impl<'a> SymbolicVariables for FixedEvaluator<'a> {
fn constant(&self, name: &str) -> Result<AffineExpression, EvalError> {
Ok(self.fixed_data.constants[name].clone().into())
}
fn value(&self, name: &str, next: bool) -> Result<AffineExpression, EvalError> {
// TODO arrays
if let Some(col_data) = self.fixed_data.fixed_cols.get(name) {
let degree = col_data.len();
let row = if next {
(self.row + 1) % degree
} else {
self.row
};
Ok(col_data[row].clone().into())
} else {
Err("Can only accesss fixed columns in the fixed evaluator."
.to_string()
.into())
}
}
fn format(&self, expr: AffineExpression) -> String {
expr.format(self.fixed_data)
}
}

View File

@@ -45,7 +45,6 @@ impl Machine for FixedLookup {
.iter()
.any(|e| contains_witness_ref(e, fixed_data))
{
println!("NA");
return Ok(LookupReturn::NotApplicable);
}

View File

@@ -16,7 +16,7 @@ pub trait Machine {
// witness_names: HashSet<&'a str>,
// ) -> Option<Box<Self>>;
/// Process o plookup. Not all values on the LHS need to be available.
/// Process a plookup. Not all values on the LHS need to be available.
/// Can update internal data.
fn process_plookup(
&mut self,

View File

@@ -47,9 +47,12 @@ pub fn split_out_machines<'a>(
// TODO we probably nede to check that machine witnesses do not appear
// in any identity among `identities` except on the RHS.
let machine = SortedWitnesses::try_new(fixed, &machine_identities, &machine_witnesses);
(vec![machine.unwrap(), fixed_lookup], identities)
if let Some(machine) = SortedWitnesses::try_new(fixed, &machine_identities, &machine_witnesses)
{
(vec![machine, fixed_lookup], identities)
} else {
(vec![fixed_lookup], identities)
}
}
/// Extracts all references to any of the given names

View File

@@ -10,10 +10,12 @@ mod affine_expression;
mod eval_error;
mod evaluator;
mod expression_evaluator;
pub mod fixed_evaluator;
mod fixed_lookup_machine;
mod machine;
mod machine_extractor;
mod sorted_witness_machine;
pub mod symbolic_evaluator;
mod util;
/// Generates the committed polynomial values
@@ -104,8 +106,8 @@ impl<'a> FixedData<'a> {
}
impl<'a> WitnessColumnNamer for FixedData<'a> {
fn name(&self, i: usize) -> &str {
self.witness_cols[i].name
fn name(&self, i: usize) -> String {
self.witness_cols[i].name.to_string()
}
}

View File

@@ -1,49 +1,122 @@
use std::collections::{btree_map::Entry, BTreeMap, HashMap, HashSet};
use std::collections::{BTreeMap, HashMap, HashSet};
use crate::analyzer::{Identity, SelectedExpressions};
use itertools::{Either, Itertools};
use crate::analyzer::{Expression, Identity, IdentityKind, SelectedExpressions};
use crate::commit_evaluator::eval_error;
use crate::commit_evaluator::machine::LookupReturn;
use crate::number::AbstractNumberType;
use super::affine_expression::AffineExpression;
use super::eval_error::EvalError;
use super::expression_evaluator::ExpressionEvaluator;
use super::fixed_evaluator::FixedEvaluator;
use super::machine::{LookupResult, Machine};
use super::symbolic_evaluator::SymbolicEvaluator;
use super::FixedData;
/// A machine that can support a lookup in a set of columns that are sorted
/// by one specific column and values in that column have to be unique.
/// This means there is a column A and a constraint of the form
/// NOTLAST { A' - A } in { POSITIVE }
/// Where
/// - NOTLAST is zero only on the last row
/// - POSITIVE has all values from 1 to half of the field size.
pub struct SortedWitnesses {
/// These are the identities only concerning the machine columns.
_identities: Vec<Identity>,
/// Maps the witness polynomial names to their IDs internal to this component
/// and optional parameter and query string.
//witness_cols: BTreeMap<&'a str, &'a WitnessColumn<'a>>,
witness_names: HashSet<String>,
// TODO this is specific to the sorted write-once memory machine.
data: BTreeMap<AbstractNumberType, AbstractNumberType>,
key_col: String,
/// Position of the witness columns in the data.
/// The key column has a position of usize::max
witness_positions: HashMap<String, usize>,
data: BTreeMap<AbstractNumberType, Vec<Option<AbstractNumberType>>>,
}
impl SortedWitnesses {
pub fn try_new(
_fixed_data: &FixedData,
fixed_data: &FixedData,
identities: &[&Identity],
witness_names: &HashSet<&str>,
) -> Option<Box<Self>> {
// TODO we should check the identities and select certain machines
// based on patterns (and just return an object that implements a trait).
// Now we just assume that we have two columns
// that are sorted by a column called "m_addr"
if identities.len() != 1 {
return None;
}
check_identity(fixed_data, identities.first().unwrap()).map(|key_col| {
let witness_positions = witness_names
.iter()
.filter_map(|&w| {
if w == key_col {
None
} else {
Some(w.to_string())
}
})
.enumerate()
.map(|(i, x)| (x, i))
.collect();
Some(Box::new(SortedWitnesses {
_identities: identities.iter().map(|&i| i.clone()).collect(),
witness_names: witness_names.iter().map(|&w| w.to_string()).collect(),
data: Default::default(),
}))
Box::new(SortedWitnesses {
key_col: key_col.to_string(),
witness_positions,
data: Default::default(),
})
})
}
}
fn check_identity<'a>(fixed_data: &'a FixedData, id: &Identity) -> Option<&'a str> {
// Looking for NOTLAST { A' - A } in { POSITIVE }
if id.kind != IdentityKind::Plookup
|| id.right.selector.is_some()
|| id.left.expressions.len() != 1
{
return None;
}
// Check for A' - A in the LHS
let key_column = check_constraint(fixed_data, id.left.expressions.first().unwrap())?;
let notlast = id.left.selector.as_ref()?;
let positive = id.right.expressions.first().unwrap();
// TODO this could be rather slow. We should check the code for identity instead
// of evaluating it.
let degree = fixed_data.degree as usize;
for row in 0..(degree) {
let ev = ExpressionEvaluator::new(FixedEvaluator::new(fixed_data, row));
let nl = ev.evaluate(notlast).ok()?.constant_value()?;
if (row == degree - 1 && nl != 0.into()) || (row < degree - 1 && nl != 1.into()) {
return None;
}
let pos = ev.evaluate(positive).ok()?.constant_value()?;
if pos != (row + 1).into() {
return None;
}
}
Some(key_column)
}
/// Checks that the identity has a constraint of the form `a' - a` as the first expression
/// on the left hand side and returns the name of the witness column.
fn check_constraint<'a>(fixed_data: &'a FixedData, constraint: &Expression) -> Option<&'a str> {
let symbolic_ev = ExpressionEvaluator::new(SymbolicEvaluator::new(fixed_data));
let sort_constraint = match symbolic_ev.evaluate(constraint) {
Ok(c) => c,
Err(_) => return None,
};
let key_column_id = match sort_constraint.nonzero_variables().as_slice() {
[key, _] => *key,
_ => return None,
};
let witness_count = fixed_data.witness_cols.len();
let pattern = AffineExpression::from_wittness_poly_value(key_column_id + witness_count)
- AffineExpression::from_wittness_poly_value(key_column_id);
if sort_constraint != pattern {
return None;
}
Some(fixed_data.witness_cols[key_column_id].name)
}
impl Machine for SortedWitnesses {
/// Process a plookup. The convention is to return an empty assignment
/// if this is not a query to this machine and only return an error
/// if the identity is violated.
fn process_plookup(
&mut self,
fixed_data: &FixedData,
@@ -56,7 +129,8 @@ impl Machine for SortedWitnesses {
.iter()
.map(|e| match e {
crate::analyzer::Expression::PolynomialReference(p) => {
if self.witness_names.contains(&p.name) {
assert!(!p.next);
if p.name == self.key_col || self.witness_positions.contains_key(&p.name) {
Some(&p.name)
} else {
None
@@ -68,69 +142,115 @@ impl Machine for SortedWitnesses {
if rhs.iter().any(|e| e.is_none()) {
return Ok(LookupReturn::NotApplicable);
}
assert_eq!(rhs[0].unwrap(), &"Assembly.m_addr".to_string());
assert_eq!(rhs[1].unwrap(), &"Assembly.m_value".to_string());
match (&left[0], &left[1]) {
(Ok(a), Ok(v)) => match (a.constant_value(), v.constant_value()) {
(Some(a), Some(v)) => match self.data.entry(a.clone()) {
Entry::Vacant(e) => {
if fixed_data.verbose {
println!("Stored in memory: {a}: {v}");
}
e.insert(v);
Ok(LookupReturn::Assignments(vec![]))
}
Entry::Occupied(e) => {
if e.get() != &v {
Err(format!(
"Lookup mismatch: There is already a unique row {a}, {} (new value: {v})", e.get()).into())
} else {
Ok(LookupReturn::Assignments(vec![]))
}
}
},
(Some(a), None) => {
if let Some(value) = self.data.get(&a) {
if let Some(assignment) = (v.clone() - value.clone().into()).solve() {
if fixed_data.verbose {
println!("Read from memory: {a}: {value}");
}
Ok(LookupReturn::Assignments(vec![assignment]))
} else {
Err("Cannot solve".to_string().into())
}
let rhs = rhs.iter().map(|x| x.unwrap()).collect::<Vec<_>>();
// Fail if the LHS has an error.
let (left, errors): (Vec<_>, Vec<_>) = left.iter().partition_map(|x| match x {
Ok(x) => Either::Left(x),
Err(x) => Either::Right(x),
});
if !errors.is_empty() {
return Err(errors
.into_iter()
.cloned()
.reduce(eval_error::combine)
.unwrap());
}
let key_index = rhs.iter().position(|&x| x == &self.key_col).unwrap();
let key_value = left[key_index].constant_value().ok_or_else(|| {
format!(
"Value of unique key must be known: {} = {}",
left[key_index].format(fixed_data),
right.expressions[key_index]
)
})?;
let mut assignments = vec![];
let stored_values = self
.data
.entry(key_value.clone())
.or_insert_with(|| vec![None; self.witness_positions.len()]);
for (&l, &r) in left.iter().zip(rhs.iter()).skip(1) {
let stored_value = &mut stored_values[self.witness_positions[r]];
match stored_value {
// There is a stored value
Some(v) => {
let constraint = l.clone() - (*v).clone().into();
if constraint.is_invalid() {
// The LHS value is known and it is differetn from the stored one.
return Err(format!(
"Lookup mismatch: There is already a unique row with {} = \
{key_value} and {r} = {v}, but wanted to store {r} = {}",
self.key_col,
l.format(fixed_data),
)
.into());
} else if constraint.constant_value() == Some(0.into()) {
// Just a repeated lookup.
} else {
Err(format!("Value at address {a} not known").into())
match constraint.solve() {
Some(assignment) => {
if fixed_data.verbose {
println!("Read {} = {key_value} -> {r} = {v}", self.key_col);
}
assignments.push(assignment);
}
None => {
return Err(
format!("Cannot solve {} = {v}", l.format(fixed_data)).into()
)
}
}
}
}
(None, _) => Err("Address must be known: <format affin eexpr> TODO"
.to_string()
.into()),
},
(Err(e), Ok(_)) | (Ok(_), Err(e)) => {
Err(format!("LHS value unknown for lookup: {e}").into())
// There is no value stored yet.
None => match l.constant_value() {
Some(v) => {
if fixed_data.verbose {
println!("Stored {} = {key_value} -> {r} = {v}", self.key_col);
}
*stored_value = Some(v);
}
None => {
return Err(format!(
"Value {r} for key {} = {key_value} not known",
self.key_col,
)
.into())
}
},
}
(Err(e1), Err(e2)) => Err(format!("LHS values unknown for lookup: {e1}, {e2}").into()),
}
Ok(LookupReturn::Assignments(assignments))
}
fn witness_col_values(
&mut self,
fixed_data: &FixedData,
) -> HashMap<String, Vec<AbstractNumberType>> {
let (mut addr, mut values): (Vec<_>, Vec<_>) =
let mut result = HashMap::new();
let (mut keys, mut values): (Vec<_>, Vec<_>) =
std::mem::take(&mut self.data).into_iter().unzip();
values.resize(fixed_data.degree as usize, 0.into());
let mut last_addr = addr.last().cloned().unwrap_or_default();
while addr.len() < fixed_data.degree as usize {
last_addr += 1;
addr.push(last_addr.clone());
let mut last_key = keys.last().cloned().unwrap_or_default();
while keys.len() < fixed_data.degree as usize {
last_key += 1;
keys.push(last_key.clone());
}
[
("Assembly.m_value".to_string(), values),
("Assembly.m_addr".to_string(), addr),
]
.into_iter()
.collect()
result.insert(self.key_col.clone(), keys);
for (col_name, &i) in &self.witness_positions {
let mut col_values = values
.iter_mut()
.map(|row| std::mem::take(&mut row[i]).unwrap_or_default())
.collect::<Vec<_>>();
col_values.resize(fixed_data.degree as usize, 0.into());
result.insert(col_name.clone(), col_values);
}
result
}
}

View File

@@ -0,0 +1,55 @@
use super::affine_expression::AffineExpression;
use super::eval_error::EvalError;
use super::expression_evaluator::SymbolicVariables;
use super::util::WitnessColumnNamer;
use super::FixedData;
/// A purely symbolic evaluator. It wil fail on fixed columns.
/// Note: The affine expressions it returns will contain variables
/// for both the "current" and the "next" row, and they are different!
/// This means these AffineExpressions should not be confused with those
/// returned by the EvaluationData struct.
pub struct SymbolicEvaluator<'a> {
fixed_data: &'a FixedData<'a>,
}
impl<'a> SymbolicEvaluator<'a> {
pub fn new(fixed_data: &'a FixedData<'a>) -> Self {
SymbolicEvaluator { fixed_data }
}
}
impl<'a> SymbolicVariables for SymbolicEvaluator<'a> {
fn constant(&self, name: &str) -> Result<AffineExpression, EvalError> {
Ok(self.fixed_data.constants[name].clone().into())
}
fn value(&self, name: &str, next: bool) -> Result<AffineExpression, EvalError> {
// TODO arrays
if let Some(id) = self.fixed_data.witness_ids.get(name) {
let witness_count = self.fixed_data.witness_ids.len();
Ok(AffineExpression::from_wittness_poly_value(
*id + if next { witness_count } else { 0 },
))
} else {
Err("Cannot access fixed columns in the symoblic evaluator."
.to_string()
.into())
}
}
fn format(&self, expr: AffineExpression) -> String {
expr.format(self)
}
}
impl<'a> WitnessColumnNamer for SymbolicEvaluator<'a> {
fn name(&self, i: usize) -> String {
let witness_count = self.fixed_data.witness_ids.len();
if i < witness_count {
self.fixed_data.name(i)
} else {
format!("{}'", self.fixed_data.name(i - witness_count))
}
}
}

View File

@@ -3,7 +3,7 @@ use crate::analyzer::Expression;
use super::FixedData;
pub trait WitnessColumnNamer {
fn name(&self, i: usize) -> &str;
fn name(&self, i: usize) -> String;
}
/// @returns true if the expression contains a reference to a next value of a witness column.