From e55284b3ea0d028fcf684883522be435c450e49d Mon Sep 17 00:00:00 2001 From: Arthur Meyre Date: Thu, 29 Jul 2021 15:49:04 +0200 Subject: [PATCH] feat(bounds): add a way to evaluate an operator graph on a dataset --- hdk/common/bounds_measurement/__init__.py | 2 + hdk/common/bounds_measurement/dataset_eval.py | 34 ++++++ .../bounds_measurement/test_dataset_eval.py | 101 ++++++++++++++++++ 3 files changed, 137 insertions(+) create mode 100644 hdk/common/bounds_measurement/__init__.py create mode 100644 hdk/common/bounds_measurement/dataset_eval.py create mode 100644 tests/common/bounds_measurement/test_dataset_eval.py diff --git a/hdk/common/bounds_measurement/__init__.py b/hdk/common/bounds_measurement/__init__.py new file mode 100644 index 000000000..00836be57 --- /dev/null +++ b/hdk/common/bounds_measurement/__init__.py @@ -0,0 +1,2 @@ +"""Bounds measurement module""" +from . import dataset_eval diff --git a/hdk/common/bounds_measurement/dataset_eval.py b/hdk/common/bounds_measurement/dataset_eval.py new file mode 100644 index 000000000..d30dda021 --- /dev/null +++ b/hdk/common/bounds_measurement/dataset_eval.py @@ -0,0 +1,34 @@ +"""Code to evaluate the IR graph on datasets""" + +from typing import Iterator + +from ..operator_graph import OPGraph + + +def eval_op_graph_bounds_on_dataset(op_graph: OPGraph, data_generator: Iterator): + """Evaluate the bounds for all output values of the operators in the graph op_graph over data + coming from the data_generator + + Args: + op_graph (OPGraph): The graph for which we want to determine the bounds + data_generator (Iterator): The dataset over which op_graph is evaluated + + Returns: + Dict: dict containing the bounds for each node from op_graph, stored with the node as key + and a dict with keys "min" and "max" as value + """ + first_input_data = dict(enumerate(next(data_generator))) + first_output = op_graph.evaluate(first_input_data) + + node_bounds = { + node: {"min": first_output[node], "max": first_output[node]} + for node in op_graph.graph.nodes() + } + + for input_data in data_generator: + current_output = op_graph.evaluate(dict(enumerate(input_data))) + for node, value in current_output.items(): + node_bounds[node]["min"] = min(node_bounds[node]["min"], value) + node_bounds[node]["max"] = max(node_bounds[node]["max"], value) + + return node_bounds diff --git a/tests/common/bounds_measurement/test_dataset_eval.py b/tests/common/bounds_measurement/test_dataset_eval.py new file mode 100644 index 000000000..7c82bf6e7 --- /dev/null +++ b/tests/common/bounds_measurement/test_dataset_eval.py @@ -0,0 +1,101 @@ +"""Test file for bounds evaluation with a dataset""" + +import pytest + +from hdk.common.bounds_measurement.dataset_eval import eval_op_graph_bounds_on_dataset +from hdk.common.data_types.integers import Integer +from hdk.common.data_types.values import EncryptedValue +from hdk.hnumpy.tracing import trace_numpy_function + + +@pytest.mark.parametrize( + "function,input_ranges,expected_output_bounds", + [ + pytest.param( + lambda x, y: x + y, + ((-10, 10), (-10, 10)), + (-20, 20), + id="x + y, (-10, 10), (-10, 10), (-20, 20)", + ), + pytest.param( + lambda x, y: x + y, + ((-10, 2), (-4, 5)), + (-14, 7), + id="x + y, (-10, 2), (-4, 5), (-14, 9)", + ), + pytest.param( + lambda x, y: x - y, + ((-10, 10), (-10, 10)), + (-20, 20), + id="x - y, (-10, 10), (-10, 10), (-20, 20)", + ), + pytest.param( + lambda x, y: x - y, + ((-10, 2), (-4, 5)), + (-15, 6), + id="x - y, (-10, 2), (-4, 5), (-15, 6)", + ), + pytest.param( + lambda x, y: x * y, + ((-10, 10), (-10, 10)), + (-100, 100), + id="x * y, (-10, 10), (-10, 10), (-100, 100)", + ), + pytest.param( + lambda x, y: x * y, + ((-10, 2), (-4, 5)), + (-50, 40), + id="x * y, (-10, 2), (-4, 5), (-50, 40)", + ), + pytest.param( + lambda x, y: x + x + y, + ((-10, 10), (-10, 10)), + (-30, 30), + id="x + x + y, (-10, 10), (-10, 10), (-30, 30)", + ), + pytest.param( + lambda x, y: x - x + y, + ((-10, 10), (-10, 10)), + (-10, 10), + id="x - x + y, (-10, 10), (-10, 10), (-10, 10)", + ), + pytest.param( + lambda x, y: x - x + y, + ((-10, 2), (-4, 5)), + (-4, 5), + id="x - x + y, (-10, 2), (-4, 5), (-4, 5)", + ), + pytest.param( + lambda x, y: x * y - x, + ((-10, 10), (-10, 10)), + (-110, 110), + id="x * y - x, (-10, 10), (-10, 10), (-110, 110)", + ), + pytest.param( + lambda x, y: x * y - x, + ((-10, 2), (-4, 5)), + (-40, 50), + id="x * y - x, (-10, 2), (-4, 5), (-40, 50),", + ), + ], +) +def test_eval_op_graph_bounds_on_dataset(function, input_ranges, expected_output_bounds): + """Test function for eval_op_graph_bounds_on_dataset""" + + op_graph = trace_numpy_function( + function, {"x": EncryptedValue(Integer(64, True)), "y": EncryptedValue(Integer(64, True))} + ) + + def data_gen(range_x, range_y): + for x_gen in range_x: + for y_gen in range_y: + yield (x_gen, y_gen) + + node_bounds = eval_op_graph_bounds_on_dataset( + op_graph, data_gen(*tuple(map(lambda x: range(x[0], x[1] + 1), input_ranges))) + ) + + output_node = op_graph.output_nodes[0] + output_node_bounds = node_bounds[output_node] + + assert (output_node_bounds["min"], output_node_bounds["max"]) == expected_output_bounds