From 35011bd4031ee58269c435f8d972fd7b9c5dbca1 Mon Sep 17 00:00:00 2001
From: Arthur Meyre <arthur.meyre@zama.ai>
Date: Mon, 20 Sep 2021 10:27:23 +0200
Subject: [PATCH] build: test codeblocks in CI

---
 .github/workflows/continuous-integration.yaml |  11 +-
 Makefile                                      |   6 +
 docs/dev/explanation/COMPILATION.md           |   2 +-
 docs/dev/explanation/FLOAT-FUSING.md          |   3 +
 docs/user/howto/COMPILING_AND_EXECUTING.md    |  21 ++-
 docs/user/tutorial/ARITHMETIC_OPERATIONS.md   |  23 +++
 docs/user/tutorial/COMPILATION_ARTIFACTS.md   |   3 +-
 docs/user/tutorial/TABLE_LOOKUP.md            |   5 +
 .../tutorial/WORKING_WITH_FLOATING_POINTS.md  |   2 +
 script/make_utils/test_md_python_code.py      | 140 ++++++++++++++++++
 10 files changed, 204 insertions(+), 12 deletions(-)
 create mode 100644 script/make_utils/test_md_python_code.py

diff --git a/.github/workflows/continuous-integration.yaml b/.github/workflows/continuous-integration.yaml
index 1f908762e..bc1cbe91d 100644
--- a/.github/workflows/continuous-integration.yaml
+++ b/.github/workflows/continuous-integration.yaml
@@ -164,7 +164,7 @@ jobs:
         with:
           name: html-docs
           path: docs/_build/html
-      - name: PyTest
+      - name: PyTest Source Code
         id: pytest
         if: ${{ steps.conformance.outcome == 'success' && !cancelled() }}
         env:
@@ -172,7 +172,14 @@ jobs:
           LD_PRELOAD: /compiler/build/lib/Runtime/libZamalangRuntime.so
         run: |
           make pytest
-      - name: Notebooks
+      - name: Test CodeBlocks
+        if: ${{ steps.conformance.outcome == 'success' && !cancelled() }}
+        env:
+          # TODO: remove this when JIT doesn't need this
+          LD_PRELOAD: /compiler/build/lib/Runtime/libZamalangRuntime.so
+        run: |
+          make test_codeblocks
+      - name: PyTest Notebooks
         if: ${{ github.event_name == 'schedule' && steps.conformance.outcome == 'success' && !cancelled() }}
         env:
           # TODO: remove this when JIT doesn't need this
diff --git a/Makefile b/Makefile
index 06c70be81..1202ad180 100644
--- a/Makefile
+++ b/Makefile
@@ -197,3 +197,9 @@ release_docker:
 upgrade_py_deps:
 	./script/make_utils/upgrade_deps.sh
 .PHONY: upgrade_py_deps
+
+# This is done by hand as pytest-codeblocks was failing with our native extensions.
+# See refused PR on the project here: https://github.com/nschloe/pytest-codeblocks/pull/58
+test_codeblocks:
+	poetry run python ./script/make_utils/test_md_python_code.py --md_dir docs/
+.PHONY: test_codeblocks
diff --git a/docs/dev/explanation/COMPILATION.md b/docs/dev/explanation/COMPILATION.md
index 253edd7e8..cf6f08f8e 100644
--- a/docs/dev/explanation/COMPILATION.md
+++ b/docs/dev/explanation/COMPILATION.md
@@ -28,7 +28,7 @@ engine = hnp.compile_numpy_function(
 )
 
 # Make homomorphic inference
-engine.run([1, 0])
+engine.run(1, 0)
 ```
 
 ## Overview
diff --git a/docs/dev/explanation/FLOAT-FUSING.md b/docs/dev/explanation/FLOAT-FUSING.md
index c283246a4..ac10088d8 100644
--- a/docs/dev/explanation/FLOAT-FUSING.md
+++ b/docs/dev/explanation/FLOAT-FUSING.md
@@ -6,6 +6,7 @@ The current compiler stack only supports integers with 7 bits or less. But it's
 
 We added fusing floating point operations to make tracing numpy functions somewhat user friendly to allow in-line quantization in the numpy code e.g.:
 
+<!--python-test:skip-->
 ```python
 import numpy
 
@@ -42,6 +43,7 @@ From the terminal node, we go back up through the nodes until we find nodes that
 
 An example of a non fusable computation with that technique is:
 
+<!--python-test:skip-->
 ```python
 import numpy
 
@@ -63,6 +65,7 @@ Firstly, it does not cover optimizing the graph, so you can end up with multiple
 
 Secondly, the current approach fails to handle some programs that in practice could be compiled. The following example could be covered by pushing the search to find a single integer input:
 
+<!--python-test:skip-->
 ```python
 def theoretically_fusable(x):
     x_1 = x + 1.5
diff --git a/docs/user/howto/COMPILING_AND_EXECUTING.md b/docs/user/howto/COMPILING_AND_EXECUTING.md
index 21866991a..6c5809779 100644
--- a/docs/user/howto/COMPILING_AND_EXECUTING.md
+++ b/docs/user/howto/COMPILING_AND_EXECUTING.md
@@ -12,6 +12,7 @@ import concrete.numpy as hnp
 
 You need to have a python function that follows the [limits](../explanation/FHE_AND_FRAMEWORK_LIMITS.md) of the **Concrete Framework**. Here is a simple example:
 
+<!--python-test:cont-->
 ```python
 def f(x, y):
     return x + y
@@ -21,6 +22,7 @@ def f(x, y):
 
 To compile the function, you need to provide what are the inputs that it's expecting. In the example function above, `x` and `y` could be scalars or tensors (though, for now, only dot between tensors are supported), they can be encrypted or clear, they can be signed or unsigned, they can have different bit-widths. So, we need to know what they are beforehand. We can do that like so:
 
+<!--python-test:cont-->
 ```python
 x = hnp.EncryptedScalar(hnp.UnsignedInteger(3))
 y = hnp.EncryptedScalar(hnp.UnsignedInteger(3))
@@ -30,12 +32,14 @@ In this configuration, both `x` and `y` are 3-bit unsigned integers, so they hav
 
 We also need an inputset. It is to determine the bit-widths of the intermediate results. It should be an iterable yielding tuples in the same order as the inputs of the function to compile. There should be at least 10 inputs in the input set to avoid warnings (except for functions with less than 10 possible inputs). The warning is there because the bigger the input set, the better the bounds will be.
 
+<!--python-test:cont-->
 ```python
 inputset = [(2, 3), (0, 0), (1, 6), (7, 7), (7, 1)]
 ```
 
 Finally, we can compile our function to its homomorphic equivalent.
 
+<!--python-test:cont-->
 ```python
 engine = hnp.compile_numpy_function(
     f, {"x": x, "y": y},
@@ -47,15 +51,16 @@ engine = hnp.compile_numpy_function(
 
 You can use `.run(...)` method of `engine` returned by `hnp.compile_numpy_function(...)` to perform fully homomorphic evaluation. Here are some examples:
 
+<!--python-test:cont-->
 ```python
->>> engine.run(3, 4)
-7
->>> engine.run(1, 2)
-3
->>> engine.run(7, 7)
-14
->>> engine.run(0, 0)
-0
+engine.run(3, 4)
+# 7
+engine.run(1, 2)
+# 3
+engine.run(7, 7)
+# 14
+engine.run(0, 0)
+# 0
 ```
 
 ```{caution}
diff --git a/docs/user/tutorial/ARITHMETIC_OPERATIONS.md b/docs/user/tutorial/ARITHMETIC_OPERATIONS.md
index 32ba1d2ef..14ab6cb94 100644
--- a/docs/user/tutorial/ARITHMETIC_OPERATIONS.md
+++ b/docs/user/tutorial/ARITHMETIC_OPERATIONS.md
@@ -6,6 +6,7 @@ In this tutorial, we are going to go over all arithmetic operations available in
 
 ### Static ClearScalar and EncryptedScalar
 
+<!--python-test:skip-->
 ```python
 def f(x):
     return x + 42
@@ -13,6 +14,7 @@ def f(x):
 
 or
 
+<!--python-test:skip-->
 ```python
 def f(x):
     return 42 + x
@@ -24,6 +26,7 @@ where
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run(3) == 45
 engine.run(0) == 42
@@ -31,6 +34,7 @@ engine.run(0) == 42
 
 ### Dynamic ClearScalar and EncryptedScalar
 
+<!--python-test:skip-->
 ```python
 def f(x, y):
     return x + y
@@ -38,6 +42,7 @@ def f(x, y):
 
 or
 
+<!--python-test:skip-->
 ```python
 def f(x, y):
     return y + x
@@ -45,6 +50,7 @@ def f(x, y):
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run(6, 4) == 10
 engine.run(1, 1) == 2
@@ -57,6 +63,7 @@ where
 
 ### EncryptedScalar and EncryptedScalar
 
+<!--python-test:skip-->
 ```python
 def f(x, y):
     return x + y
@@ -69,6 +76,7 @@ where
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run(7, 7) == 14
 engine.run(3, 4) == 7
@@ -78,6 +86,7 @@ engine.run(3, 4) == 7
 
 ### Static ClearScalar and EncryptedScalar 
 
+<!--python-test:skip-->
 ```python
 def f(x):
     return 3 - x
@@ -89,6 +98,7 @@ where
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run(2) == 1
 engine.run(3) == 0
@@ -96,6 +106,7 @@ engine.run(3) == 0
 
 ### Dynamic ClearScalar and EncryptedScalar
 
+<!--python-test:skip-->
 ```python
 def f(x, y):
     return y - x
@@ -108,6 +119,7 @@ where
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run(2, 4) == 2
 engine.run(1, 7) == 6
@@ -117,6 +129,7 @@ engine.run(1, 7) == 6
 
 ### Static ClearScalar and EncryptedScalar
 
+<!--python-test:skip-->
 ```python
 def f(x):
     return x * 2
@@ -124,6 +137,7 @@ def f(x):
 
 or
 
+<!--python-test:skip-->
 ```python
 def f(x):
     return 2 * x
@@ -135,6 +149,7 @@ where
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run(2) == 4
 engine.run(5) == 10
@@ -142,6 +157,7 @@ engine.run(5) == 10
 
 ### Dynamic ClearScalar and EncryptedScalar
 
+<!--python-test:skip-->
 ```python
 def f(x, y):
     return x * y
@@ -149,6 +165,7 @@ def f(x, y):
 
 or
 
+<!--python-test:skip-->
 ```python
 def f(x, y):
     return y * x
@@ -161,6 +178,7 @@ where
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run(2, 3) == 6
 engine.run(1, 7) == 7
@@ -170,6 +188,7 @@ engine.run(1, 7) == 7
 
 ### Dynamic ClearTensor and EncryptedTensor
 
+<!--python-test:skip-->
 ```python
 def f(x, y):
     return np.dot(x, y)
@@ -177,6 +196,7 @@ def f(x, y):
 
 or
 
+<!--python-test:skip-->
 ```python
 def f(x, y):
     return np.dot(y, x)
@@ -189,6 +209,7 @@ where
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run([1, 1], [2, 3]) == 5
 engine.run([2, 3], [2, 3]) == 13
@@ -196,6 +217,7 @@ engine.run([2, 3], [2, 3]) == 13
 
 ## Combining all together
 
+<!--python-test:skip-->
 ```python
 def f(x, y, z):
     return 100 - (2 * (np.dot(x, y) + z))
@@ -209,6 +231,7 @@ where
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run([1, 2], [4, 3], 10) == 60
 engine.run([2, 3], [3, 2], 5) == 66
diff --git a/docs/user/tutorial/COMPILATION_ARTIFACTS.md b/docs/user/tutorial/COMPILATION_ARTIFACTS.md
index 2b08811e6..576339e91 100644
--- a/docs/user/tutorial/COMPILATION_ARTIFACTS.md
+++ b/docs/user/tutorial/COMPILATION_ARTIFACTS.md
@@ -6,6 +6,7 @@ In this tutorial, we are going to go over the artifact system, which is designed
 
 In case of compilation failures, artifacts are exported automatically to `.artifacts` directory under the working directory. Let's intentionally create a compilation failure and show what kinds of things are exported.
 
+<!--python-test:skip-->
 ```python
 def f(x):
     return np.sin(x)
@@ -93,7 +94,7 @@ Manual exports are mostly used for visualization. Nonetheless, they can be very
 import concrete.numpy as hnp
 import pathlib
 
-artifacts = hnp.CompilationArtifacts(pathlib.Path("/custom/export/path"))
+artifacts = hnp.CompilationArtifacts(pathlib.Path("/tmp/custom/export/path"))
 hnp.compile_numpy_function(
     lambda x: 100 - (3 * (x + 2)),
     {"x": hnp.EncryptedScalar(hnp.UnsignedInteger(3))},
diff --git a/docs/user/tutorial/TABLE_LOOKUP.md b/docs/user/tutorial/TABLE_LOOKUP.md
index 4943e6c86..d2bb54d9c 100644
--- a/docs/user/tutorial/TABLE_LOOKUP.md
+++ b/docs/user/tutorial/TABLE_LOOKUP.md
@@ -21,6 +21,7 @@ where
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run(0) == 2
 engine.run(1) == 1
@@ -34,6 +35,7 @@ Direct tables are tedious to prepare by hand. When possible, **concrete** fuses
 
 Here is an example function that results in fused table lookup:
 
+<!--python-test:skip-->
 ```python
 def f(x):
     return 127 - (50 * (np.sin(x) + 1)).astype(np.uint32) # astype is to go back to integer world
@@ -45,6 +47,7 @@ where
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run(0) == 77
 engine.run(1) == 35
@@ -66,12 +69,14 @@ and after floating point operations are fused, we get the following operation gr
 
 Internally, it uses the following lookup table
 
+<!--python-test:skip-->
 ```python
 table = LookupTable([50, 92, 95, 57, 12, 2, 36, 82])
 ```
 
 which is calculated by:
 
+<!--python-test:skip-->
 ```python
 [(50 * (np.sin(x) + 1)).astype(np.uint32) for x in range(2 ** 3)]
 ```
diff --git a/docs/user/tutorial/WORKING_WITH_FLOATING_POINTS.md b/docs/user/tutorial/WORKING_WITH_FLOATING_POINTS.md
index 60ee51ad0..0fe234602 100644
--- a/docs/user/tutorial/WORKING_WITH_FLOATING_POINTS.md
+++ b/docs/user/tutorial/WORKING_WITH_FLOATING_POINTS.md
@@ -2,6 +2,7 @@
 
 ## An example
 
+<!--python-test:skip-->
 ```python
 def f(x):
     np.fabs(100 * (2 * np.sin(x) * np.cos(x))).astype(np.uint32) # astype is to go back to integer world
@@ -13,6 +14,7 @@ where
 
 results in
 
+<!--python-test:skip-->
 ```python
 engine.run(3) == 27
 engine.run(0) == 0
diff --git a/script/make_utils/test_md_python_code.py b/script/make_utils/test_md_python_code.py
new file mode 100644
index 000000000..71660cc2e
--- /dev/null
+++ b/script/make_utils/test_md_python_code.py
@@ -0,0 +1,140 @@
+"""Helper script to be able to test python code in markdown files."""
+
+import argparse
+import re
+import sys
+import traceback
+from pathlib import Path
+from typing import Dict, List
+
+PYTHON_BLOCK_HINTS = ["py", "python", "python3"]
+BLOCK_STARTS = tuple(f"```{hint}" for hint in PYTHON_BLOCK_HINTS)
+BLOCK_END = "```"
+DIRECTIVE_COMMENT_PATTERN = "<!--python-test:(.*)-->"
+SKIP_DIRECTIVE = "skip"
+CONT_DIRECTIVE = "cont"
+
+
+def get_code_blocks_for_file(md_file: Path) -> Dict[int, List[str]]:
+    """Function to process an md file and test the python code in it.
+
+    Args:
+        md_file (Path): The path to the md file to convert and test.
+
+    Raises:
+        SyntaxError: If EOF is reached before a code block is closed.
+        SyntaxError: If a block is not closed and a new python block is opened.
+
+    Returns:
+        Dict[int, List[str]]: A dict containing the code blocks of the file.
+    """
+    file_content = None
+
+    python_code_blocks: Dict[int, List[str]] = {}
+
+    def get_code_block_container(line_idx):
+        block_idx = line_idx
+        python_code_blocks[block_idx] = []
+        return python_code_blocks[block_idx]
+
+    with open(md_file, encoding="utf-8") as f:
+        file_content = f.readlines()
+
+    file_content_iterator = iter(enumerate(file_content, 1))
+    python_block_continues = False
+    skip_next_python_block = False
+
+    for line_idx, line in file_content_iterator:
+        if line.startswith(BLOCK_STARTS):
+            if skip_next_python_block:
+                skip_next_python_block = False
+                continue
+            if not python_block_continues:
+                current_python_code = get_code_block_container(line_idx)
+            while True:
+                line_idx, line = next(file_content_iterator)
+                if line == "":
+                    # Reached EOF
+                    raise SyntaxError(
+                        "Reached EOF before finding the end of the current python block in "
+                        f"{str(md_file)}"
+                    )
+
+                if line.strip() == BLOCK_END:
+                    break
+
+                if line.startswith(BLOCK_STARTS):
+                    raise SyntaxError(
+                        f"Error at line {line_idx} in file {str(md_file)}, "
+                        "python block was opened before the previous one was "
+                        "closed (missing ``` ?)"
+                    )
+                current_python_code.append(line)
+        else:
+            match = re.match(DIRECTIVE_COMMENT_PATTERN, line)
+            if match is not None:
+                directive = match.group(1)
+                if directive == SKIP_DIRECTIVE:
+                    skip_next_python_block = True
+                elif directive == CONT_DIRECTIVE:
+                    python_block_continues = True
+
+                python_block_continues = python_block_continues and not skip_next_python_block
+
+    return python_code_blocks
+
+
+def main(args):
+    """The actual processing."""
+    md_dir_path = Path(args.md_dir)
+    md_files = sorted(md_dir_path.glob("**/*.md"))
+
+    code_blocks_per_file: Dict[str, Dict[int, List[str]]] = {}
+
+    err_msg = ""
+
+    for md_file in md_files:
+        md_file = md_file.resolve().absolute()
+        md_file_str = str(md_file)
+        # pylint: disable=broad-except
+        try:
+            code_blocks_per_file[md_file_str] = get_code_blocks_for_file(md_file)
+        except Exception:
+            err_msg += f"Error while converting {md_file_str}"
+            err_msg += traceback.format_exc() + "\n"
+        # pylint: enable=broad-except
+
+    for md_file_str, code_blocks in code_blocks_per_file.items():
+        for line_idx, python_code in code_blocks.items():
+            # pylint: disable=broad-except,exec-used
+            try:
+                print(f"Testing block starting line #{line_idx} from {md_file_str}")
+                python_code = "".join(python_code)
+                compiled_code = compile(python_code, filename=md_file_str, mode="exec")
+                exec(compiled_code, {"__MODULE__": "__main__"})
+                print("Success")
+            except Exception:
+                print("Failed")
+                err_msg += (
+                    f"Error while testing block starting line #{line_idx} from {md_file_str}:\n"
+                )
+                err_msg += f"```\n{python_code}```\n"
+                err_msg += traceback.format_exc() + "\n"
+            # pylint: enable=broad-except,exec-used
+
+    if err_msg != "":
+        print(err_msg)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        "Converts md python blocks to python files", allow_abbrev=False
+    )
+    parser.add_argument(
+        "--md_dir", type=str, help="The path to the dir containing md files to convert."
+    )
+
+    cli_args = parser.parse_args()
+
+    main(cli_args)