mirror of
https://github.com/MPCStats/zk-stats-lib.git
synced 2026-01-09 13:38:02 -05:00
@@ -16,7 +16,7 @@ ERROR_CIRCUIT_STRICT = 0.0001
|
||||
ERROR_CIRCUIT_RELAXED = 0.1
|
||||
|
||||
|
||||
def data_to_file(data_path: Path, data: list[torch.Tensor]) -> dict[str, list]:
|
||||
def data_to_json_file(data_path: Path, data: list[torch.Tensor]) -> dict[str, list]:
|
||||
column_names = [f"columns_{i}" for i in range(len(data))]
|
||||
column_to_data = {
|
||||
column: d.tolist()
|
||||
@@ -45,7 +45,7 @@ def compute(
|
||||
data_path = basepath / "data.json"
|
||||
data_commitment_path = basepath / "commitments.json"
|
||||
|
||||
column_to_data = data_to_file(data_path, data)
|
||||
column_to_data = data_to_json_file(data_path, data)
|
||||
# If selected_columns_params is None, select all columns
|
||||
if selected_columns_params is None:
|
||||
selected_columns = list(column_to_data.keys())
|
||||
|
||||
@@ -2,10 +2,10 @@ import json
|
||||
|
||||
import torch
|
||||
|
||||
from zkstats.core import generate_data_commitment
|
||||
from zkstats.core import generate_data_commitment, prover_gen_settings, _preprocess_data_file_to_json, verifier_define_calculation
|
||||
from zkstats.computation import computation_to_model
|
||||
|
||||
from .helpers import data_to_file, compute
|
||||
from .helpers import data_to_json_file, compute
|
||||
|
||||
|
||||
def test_get_data_commitment_maps(tmp_path, column_0, column_1, scales):
|
||||
@@ -16,7 +16,7 @@ def test_get_data_commitment_maps(tmp_path, column_0, column_1, scales):
|
||||
# "columns_0": [1, 2, 3, 4, 5],
|
||||
# "columns_1": [6, 7, 8, 9, 10],
|
||||
# }
|
||||
data_json = data_to_file(data_path, [column_0, column_1])
|
||||
data_json = data_to_json_file(data_path, [column_0, column_1])
|
||||
# data_commitment is a mapping[scale -> mapping[column_name, commitment_hex]]
|
||||
# {
|
||||
# scale_0: {
|
||||
@@ -51,7 +51,7 @@ def test_get_data_commitment_maps_hardcoded(tmp_path):
|
||||
data_commitment_path = tmp_path / "commitments.json"
|
||||
column_0 = torch.tensor([3.0, 4.5, 1.0, 2.0, 7.5, 6.4, 5.5])
|
||||
column_1 = torch.tensor([2.7, 3.3, 1.1, 2.2, 3.8, 8.2, 4.4])
|
||||
data_to_file(data_path, [column_0, column_1])
|
||||
data_to_json_file(data_path, [column_0, column_1])
|
||||
scales = [2, 3]
|
||||
generate_data_commitment(data_path, scales, data_commitment_path)
|
||||
with open(data_commitment_path, "r") as f:
|
||||
@@ -63,7 +63,7 @@ def test_get_data_commitment_maps_hardcoded(tmp_path):
|
||||
|
||||
def test_integration_select_partial_columns(tmp_path, column_0, column_1, error, scales):
|
||||
data_path = tmp_path / "data.json"
|
||||
data_json = data_to_file(data_path, [column_0, column_1])
|
||||
data_json = data_to_json_file(data_path, [column_0, column_1])
|
||||
columns = list(data_json.keys())
|
||||
assert len(columns) == 2
|
||||
# Select only the first column from two columns
|
||||
@@ -75,3 +75,83 @@ def test_integration_select_partial_columns(tmp_path, column_0, column_1, error,
|
||||
_, model = computation_to_model(simple_computation, error)
|
||||
# gen settings, setup, prove, verify
|
||||
compute(tmp_path, [column_0, column_1], model, scales, selected_columns)
|
||||
|
||||
|
||||
def test_csv_data(tmp_path, column_0, column_1, error, scales):
|
||||
data_json_path = tmp_path / "data.csv"
|
||||
data_csv_path = tmp_path / "data.csv"
|
||||
data_json = data_to_json_file(data_json_path, [column_0, column_1])
|
||||
json_file_to_csv(data_json_path, data_csv_path)
|
||||
|
||||
selected_columns = list(data_json.keys())
|
||||
|
||||
def simple_computation(state, x):
|
||||
return state.mean(x[0])
|
||||
|
||||
sel_data_path = tmp_path / "comb_data.json"
|
||||
model_path = tmp_path / "model.onnx"
|
||||
settings_path = tmp_path / "settings.json"
|
||||
data_commitment_path = tmp_path / "commitments.json"
|
||||
|
||||
# Test: `generate_data_commitment` works with csv
|
||||
generate_data_commitment(data_csv_path, scales, data_commitment_path)
|
||||
|
||||
# Test: `prover_gen_settings` works with csv
|
||||
_, model_for_proving = computation_to_model(simple_computation, error)
|
||||
prover_gen_settings(
|
||||
data_path=data_csv_path,
|
||||
selected_columns=selected_columns,
|
||||
sel_data_path=str(sel_data_path),
|
||||
prover_model=model_for_proving,
|
||||
prover_model_path=str(model_path),
|
||||
scale=scales,
|
||||
mode="resources",
|
||||
settings_path=str(settings_path),
|
||||
)
|
||||
|
||||
# Test: `prover_gen_settings` works with csv
|
||||
# Instantiate the model for verification since the state of `model_for_proving` is changed after `prover_gen_settings`
|
||||
_, model_for_verification = computation_to_model(simple_computation, error)
|
||||
verifier_define_calculation(data_csv_path, selected_columns, str(sel_data_path), model_for_verification, str(model_path))
|
||||
|
||||
|
||||
def json_file_to_csv(data_json_path, data_csv_path):
|
||||
with open(data_json_path, "r") as f:
|
||||
data_from_json = json.load(f)
|
||||
# Generate csv file from json
|
||||
column_names = list(data_from_json.keys())
|
||||
len_columns = len(data_from_json[column_names[0]])
|
||||
for column in column_names:
|
||||
assert len(data_from_json[column]) == len_columns, "All columns should have the same length"
|
||||
rows = [
|
||||
[str(data_from_json[column][i]) for column in column_names]
|
||||
for i in range(len_columns)
|
||||
]
|
||||
with open(data_csv_path, "w") as f:
|
||||
f.write(",".join(column_names) + "\n")
|
||||
for row in rows:
|
||||
f.write(",".join(row) + "\n")
|
||||
|
||||
|
||||
def test__preprocess_data_file_to_json(tmp_path, column_0, column_1):
|
||||
data_json_path = tmp_path / "data.json"
|
||||
data_from_json = data_to_json_file(data_json_path, [column_0, column_1])
|
||||
|
||||
# Test: csv can be converted to json
|
||||
# 1. Generate a csv file from json
|
||||
data_csv_path = tmp_path / "data.csv"
|
||||
json_file_to_csv(data_json_path, data_csv_path)
|
||||
# 2. Convert csv to json
|
||||
data_from_csv_json_path = tmp_path / "data_from_csv.json"
|
||||
_preprocess_data_file_to_json(data_csv_path, data_from_csv_json_path)
|
||||
with open(data_from_csv_json_path, "r") as f:
|
||||
data_from_csv = json.load(f)
|
||||
# 3. Compare the two json files
|
||||
assert data_from_csv == data_from_json
|
||||
|
||||
# Test: this function can also handle json format by just copying the file
|
||||
new_data_json_path = tmp_path / "new_data.json"
|
||||
_preprocess_data_file_to_json(data_json_path, new_data_json_path)
|
||||
with open(new_data_json_path, "r") as f:
|
||||
new_data_from_json = json.load(f)
|
||||
assert new_data_from_json == data_from_json
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
from typing import Type, Sequence, Mapping, Union, Literal
|
||||
import torch
|
||||
import ezkl
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Type, Sequence, Mapping, Union, Literal, Callable
|
||||
from enum import Enum
|
||||
import os
|
||||
import numpy as np
|
||||
import json
|
||||
import time
|
||||
|
||||
import torch
|
||||
import ezkl
|
||||
|
||||
from zkstats.computation import IModel
|
||||
|
||||
|
||||
@@ -40,7 +44,11 @@ def create_dummy(data_path: str, dummy_data_path: str) -> None:
|
||||
"""
|
||||
Create a dummy data file with randomized data based on the shape of the original data.
|
||||
"""
|
||||
data = json.loads(open(data_path, "r").read())
|
||||
# Convert data file to json under the same directory but with suffix .json
|
||||
data_path: Path = Path(data_path)
|
||||
data_json_path = Path(data_path).with_suffix(DataExtension.JSON.value)
|
||||
|
||||
data = json.loads(open(data_json_path, "r").read())
|
||||
# assume all columns have same number of rows
|
||||
dummy_data ={}
|
||||
for col in data:
|
||||
@@ -270,16 +278,17 @@ def generate_data_commitment(data_path: str, scales: Sequence[int], data_commitm
|
||||
Generate and store data commitment maps for different scales so that verifiers can verify
|
||||
proofs with different scales.
|
||||
|
||||
:param data_path: path to the data file. The data file should be a JSON file with the following format:
|
||||
{
|
||||
"column_0": [number_0, number_1, ...],
|
||||
"column_1": [number_0, number_1, ...],
|
||||
}
|
||||
:param data_path: data file path. The format must be anything defined in `DataExtension`
|
||||
:param scales: a list of scales to use for the commitments
|
||||
:param data_commitment_path: path to store the generated data commitment maps
|
||||
"""
|
||||
|
||||
with open(data_path) as f:
|
||||
# Convert `data_path` to json file `data_json_path`
|
||||
data_path: Path = Path(data_path)
|
||||
data_json_path = Path(data_path).with_suffix(DataExtension.JSON.value)
|
||||
_preprocess_data_file_to_json(data_path, data_json_path)
|
||||
|
||||
with open(data_json_path) as f:
|
||||
data_json = json.load(f)
|
||||
data_commitments = {
|
||||
str(scale): {
|
||||
@@ -367,14 +376,62 @@ def _gen_settings(
|
||||
print("setting: ", f_setting.read())
|
||||
|
||||
|
||||
def _csv_file_to_json(old_file_path: Union[Path, str], out_data_json_path: Union[Path, str], *, delimiter: str = ",") -> None:
|
||||
data_csv_path = Path(old_file_path)
|
||||
with open(data_csv_path, 'r') as f_csv:
|
||||
reader = csv.reader(f_csv, delimiter=delimiter, strict=True)
|
||||
# Read all data from the reader to `rows`
|
||||
rows_with_column_name = tuple(reader)
|
||||
if len(rows_with_column_name) < 1:
|
||||
raise ValueError("No column names in the CSV file")
|
||||
if len(rows_with_column_name) < 2:
|
||||
raise ValueError("No data in the CSV file")
|
||||
column_names = rows_with_column_name[0]
|
||||
rows = rows_with_column_name[1:]
|
||||
|
||||
columns = [
|
||||
[
|
||||
float(rows[j][i])
|
||||
for j in range(len(rows))
|
||||
]
|
||||
for i in range(len(rows[0]))
|
||||
]
|
||||
data = {
|
||||
column_name: column_data
|
||||
for column_name, column_data in zip(column_names, columns)
|
||||
}
|
||||
with open(out_data_json_path, "w") as f_json:
|
||||
json.dump(data, f_json)
|
||||
|
||||
|
||||
class DataExtension(Enum):
|
||||
CSV = ".csv"
|
||||
JSON = ".json"
|
||||
|
||||
|
||||
DATA_FORMAT_PREPROCESSING_FUNCTION: dict[DataExtension, Callable[[Union[Path, str], Path], None]] = {
|
||||
DataExtension.CSV: _csv_file_to_json,
|
||||
DataExtension.JSON: lambda old_file_path, out_data_json_path: Path(out_data_json_path).write_text(Path(old_file_path).read_text())
|
||||
}
|
||||
|
||||
def _preprocess_data_file_to_json(data_path: Union[Path, str], out_data_json_path: Path):
|
||||
data_file_extension = DataExtension(data_path.suffix)
|
||||
preprocess_function = DATA_FORMAT_PREPROCESSING_FUNCTION[data_file_extension]
|
||||
preprocess_function(data_path, out_data_json_path)
|
||||
|
||||
|
||||
def _process_data(
|
||||
data_path: str,
|
||||
data_path: Union[str | Path],
|
||||
col_array: list[str],
|
||||
sel_data_path: list[str],
|
||||
) -> list[torch.Tensor]:
|
||||
data_tensor_array=[]
|
||||
sel_data = []
|
||||
data_onefile = json.loads(open(data_path, "r").read())
|
||||
data_path: Path = Path(data_path)
|
||||
# Convert data file to json under the same directory but with suffix .json
|
||||
data_json_path = Path(data_path).with_suffix(DataExtension.JSON.value)
|
||||
_preprocess_data_file_to_json(data_path, data_json_path)
|
||||
data_onefile = json.loads(open(data_json_path, "r").read())
|
||||
|
||||
for col in col_array:
|
||||
data = data_onefile[col]
|
||||
@@ -394,4 +451,4 @@ def _get_commitment_for_column(column: list[float], scale: int) -> str:
|
||||
res_poseidon_hash = ezkl.poseidon_hash(serialized_data)[0]
|
||||
# res_hex = ezkl.vecu64_to_felt(res_poseidon_hash[0])
|
||||
|
||||
return res_poseidon_hash
|
||||
return res_poseidon_hash
|
||||
|
||||
Reference in New Issue
Block a user