In [1]:
pip install -r ../../requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import ezkl
import torch
from torch import nn
import json
import os
import time
import scipy
import numpy as np
import matplotlib.pyplot as plt
import statistics
import math

In [3]:
%run -i ../../zkstats/core.py

In [4]:
# init path
os.makedirs(os.path.dirname('shared/'), exist_ok=True)
os.makedirs(os.path.dirname('prover/'), exist_ok=True)
verifier_model_path = os.path.join('shared/verifier.onnx')
prover_model_path = os.path.join('prover/prover.onnx')
verifier_compiled_model_path = os.path.join('shared/verifier.compiled')
prover_compiled_model_path = os.path.join('prover/prover.compiled')
pk_path = os.path.join('shared/test.pk')
vk_path = os.path.join('shared/test.vk')
proof_path = os.path.join('shared/test.pf')
settings_path = os.path.join('shared/settings.json')
srs_path = os.path.join('shared/kzg.srs')
witness_path = os.path.join('prover/witness.json')
# this is private to prover since it contains actual data
sel_data_path = os.path.join('prover/sel_data.json')
# this is just dummy random value
sel_dummy_data_path = os.path.join('shared/sel_dummy_data.json')

=======================  ZK-STATS FLOW =======================

Here is a weird example where different col1 and col2 of data.json have different row number. We just want to show that different dimension is possible. In fact, a person can just request median(col_1), and median(col_2). Then just compute mean on his own as well, but here we show that the code is composable enough to do all at once. 

In [5]:

data_path = os.path.join('data.json')
dummy_data_path = os.path.join('shared/dummy_data.json')

data = json.loads(open(data_path, "r").read())
data1 = data['col_1']
data2 = data['col_2']


create_dummy(data_path, dummy_data_path)
dummy_data = json.loads(open(dummy_data_path, "r").read())
dummy_data1 = dummy_data['col_1']
dummy_data2 = dummy_data['col_2']


dummy_theory_output_median1 = torch.tensor(np.median(dummy_data1))
dummy_lower_to_median1 = torch.tensor(np.sort(dummy_data1)[int(len(dummy_data1)/2)-1])
dummy_upper_to_median1 = torch.tensor(np.sort(dummy_data1)[int(len(dummy_data1)/2)])

dummy_theory_output_median2 = torch.tensor(np.median(dummy_data2))
dummy_lower_to_median2 = torch.tensor(np.sort(dummy_data2)[int(len(dummy_data2)/2)-1])
dummy_upper_to_median2 = torch.tensor(np.sort(dummy_data2)[int(len(dummy_data2)/2)])

dummy_theory_output_mean = torch.mean(torch.tensor([dummy_theory_output_median1, dummy_theory_output_median2]))

theory_output_median1 = torch.tensor(np.median(data1))
lower_to_median1 = torch.tensor(np.sort(data1)[int(len(data1)/2)-1])
upper_to_median1 = torch.tensor(np.sort(data1)[int(len(data1)/2)])

theory_output_median2 = torch.tensor(np.median(data2))
lower_to_median2 = torch.tensor(np.sort(data2)[int(len(data2)/2)-1])
upper_to_median2 = torch.tensor(np.sort(data2)[int(len(data2)/2)])

theory_output_mean = torch.mean(torch.tensor([theory_output_median1, theory_output_median2]))





In [6]:
def median(X, median, lower, upper):
    # since within 1%, we regard as same value
    count_less = torch.sum((X < 0.99*median).double())
    count_equal = torch.sum((torch.abs(X-median)<=torch.abs(0.01*median)).double())
    len = X.size()[1]
    half_len = torch.floor(torch.div(len, 2))

    # not support modulo yet
    less_cons = count_less<half_len+2*(len/2 - torch.floor(len/2))
    more_cons = count_less+count_equal>half_len

    # For count_equal == 0
    lower_exist = torch.sum((torch.abs(X-lower)<=torch.abs(0.01*lower)).double())>0
    lower_cons = torch.sum((X>1.01*lower).double())==half_len
    upper_exist = torch.sum((torch.abs(X-upper)<=torch.abs(0.01*upper)).double())>0
    upper_cons = torch.sum((X<0.99*upper).double())==half_len
    bound = 2*count_less==2*half_len
    # 0.02 since 2*0.01
    bound_avg = (torch.abs(lower+upper-2*median)<=torch.abs(0.02*median))

    median_in_cons = torch.logical_and(less_cons, more_cons)
    median_out_cons = torch.logical_and(torch.logical_and(bound, bound_avg), torch.logical_and(torch.logical_and(lower_cons, upper_cons), torch.logical_and(lower_exist, upper_exist)))

    return(torch.where(count_equal==0, median_out_cons, median_in_cons), median)


In [7]:
def mean(X, mean):
    return (torch.abs(torch.sum(X)-X.size()[1]*(mean))<=torch.abs(0.01*X.size()[1]*mean), mean)

In [8]:
scales = [8]
selected_columns = ['col_1', 'col_2']
commitment_maps = get_data_commitment_maps(data_path, scales)

In [13]:
print("dummy output: ", dummy_theory_output_mean)
# Verifier/ data consumer side: send desired calculation
class verifier_model(nn.Module):
    def __init__(self):
        super(verifier_model, self).__init__()
        # w represents mean in this case
        self.median1 = nn.Parameter(data = dummy_theory_output_median1, requires_grad = False)
        self.lower1 = nn.Parameter(data = dummy_lower_to_median1, requires_grad = False)
        self.upper1 = nn.Parameter(data = dummy_upper_to_median1, requires_grad = False)
        self.median2 = nn.Parameter(data = dummy_theory_output_median2, requires_grad = False)
        self.lower2 = nn.Parameter(data = dummy_lower_to_median2, requires_grad = False)
        self.upper2 = nn.Parameter(data = dummy_upper_to_median2, requires_grad = False)
        self.mean = nn.Parameter(data = dummy_theory_output_mean, requires_grad = False)
    def forward(self,X1, X2):
        bool1, median1 = median(X1, self.median1, self.lower1, self.upper1)
        bool2, median2 = median(X2, self.median2, self.lower2, self.upper2)
        bool3, output_mean = mean(torch.tensor([median1, median2]).reshape(1,-1,1), self.mean)
        return (torch.logical_and(torch.logical_and(bool1, bool2),bool3), output_mean )



verifier_define_calculation(dummy_data_path, selected_columns,sel_dummy_data_path,verifier_model, verifier_model_path)

dummy output:  tensor(14.7750, dtype=torch.float64)


  bool3, output_mean = mean(torch.tensor([median1, median2]).reshape(1,-1,1), self.mean)
  bool3, output_mean = mean(torch.tensor([median1, median2]).reshape(1,-1,1), self.mean)


In [14]:
# prover calculates settings, send to verifier
print("theory mean output: ", theory_output_mean)
print("median 1: ", theory_output_median1)

class prover_model(nn.Module):
    def __init__(self):
        super(prover_model, self).__init__()
        # w represents mean in this case
        self.median1 = nn.Parameter(data = theory_output_median1, requires_grad = False)
        self.lower1 = nn.Parameter(data = lower_to_median1, requires_grad = False)
        self.upper1 = nn.Parameter(data = upper_to_median1, requires_grad = False)
        self.median2 = nn.Parameter(data = theory_output_median2, requires_grad = False)
        self.lower2 = nn.Parameter(data = lower_to_median2, requires_grad = False)
        self.upper2 = nn.Parameter(data = upper_to_median2, requires_grad = False)
        self.mean = nn.Parameter(data = theory_output_mean, requires_grad = False)
    def forward(self,X1, X2):
        bool1, median1 = median(X1, self.median1, self.lower1, self.upper1)
        bool2, median2 = median(X2, self.median2, self.lower2, self.upper2)
        bool3, output_mean = mean(torch.tensor([median1, median2]).reshape(1,-1,1), self.mean)
        return (torch.logical_and(torch.logical_and(bool1, bool2),bool3), output_mean )



prover_gen_settings(data_path,selected_columns, sel_data_path, prover_model,prover_model_path, scales, "resources", settings_path)

theory mean output:  tensor(49.3500, dtype=torch.float64)
median 1:  tensor(49.5500, dtype=torch.float64)


  bool3, output_mean = mean(torch.tensor([median1, median2]).reshape(1,-1,1), self.mean)
  bool3, output_mean = mean(torch.tensor([median1, median2]).reshape(1,-1,1), self.mean)


==== Generate & Calibrate Setting ====
scale:  [8]
setting:  {"run_args":{"tolerance":{"val":0.0,"scale":1.0},"input_scale":8,"param_scale":8,"scale_rebase_multiplier":10,"lookup_range":[-25518,25754],"logrows":16,"num_inner_cols":2,"variables":[["batch_size",1]],"input_visibility":{"Hashed":{"hash_is_public":true,"outlets":[]}},"output_visibility":"Public","param_visibility":"Private"},"num_rows":20992,"total_assignments":16104,"total_const_size":2430,"model_instance_shapes":[[1],[1]],"model_output_scales":[0,8],"model_input_scales":[8,8],"module_sizes":{"kzg":[],"poseidon":[20992,[2]],"elgamal":[0,[0]]},"required_lookups":["Abs",{"GreaterThan":{"a":0.0}},"KroneckerDelta"],"check_mode":"UNSAFE","version":"7.0.0","num_blinding_factors":null}


In [15]:
# Here verifier & prover can concurrently call setup since all params are public to get pk.
# Here write as verifier function to emphasize that verifier must calculate its own vk to be sure
setup(verifier_model_path, verifier_compiled_model_path, settings_path,vk_path, pk_path )

print("=======================================")
# Prover generates proof
print("Theory output: ", theory_output_mean)
prover_gen_proof(prover_model_path, sel_data_path, witness_path, prover_compiled_model_path, settings_path, proof_path, pk_path)

spawning module 0
spawning module 2
spawning module 0


==== setting up ezkl ====


spawning module 2


Time setup: 7.375061988830566 seconds
Theory output:  tensor(49.3500, dtype=torch.float64)
==== Generating Witness ====
witness boolean:  1.0
witness result 1 : 49.3515625
==== Generating Proof ====


spawning module 0
spawning module 2


proof:  {'instances': [[[3042937791208075219, 8157070662846698822, 3804781648660056856, 172406108020799675], [15295097400487804665, 12861486368330479023, 3350118022201779210, 343142782800691716], [12436184717236109307, 3962172157175319849, 7381016538464732718, 1011752739694698287], [10870267098303494893, 1752989342377741058, 8860763459400202009, 2635465469930673149]]], 'proof': '0a2ee65fcbed7baeb871d532a9e0e15e0c326f0a9abe5ad62bc6e30ce775491f27d93b0c383439cb780cde03407fdb37df0dc05b2b6b819849e8856656a88d331a600e7cec11e10a3f5590d1820d2274310ff340e1d8c5a9883034ba654884db11d7bcc45f60ee10ec6a6b1d0bf68ad5389ddb02af0e426bbf1cfec4923aa4d32a4f7c6c8e2944258eb4c0a964fb364f163958c86f3759e2a71a4ec2ef62516c2bfaf8f80f93d60e0f5545df395e7f891d486fab3490c47d1cfee1699fce4446265697461c2fd84596312dbd69d8b070fb2dffeda829531ab6585ef927f0a16f1682ee2e9a793237f5392981563a2a936473749df1854c97c5c4964ae0388b1003f07c0de3ecef7ef0b1d61c9168a9ac15274b3c0f6b0b4eeee45c175a9dc6862432ccb1f71283832d607c3b39fed958a22ada4810

In [16]:
# Verifier verifies
verifier_verify(proof_path, settings_path, vk_path, selected_columns, commitment_maps)

49.3515625