chore(common): Merge branch 'main' into release/2.7.x

This commit is contained in:
Bourgerie Quentin
2024-08-13 14:12:19 +02:00
252 changed files with 9166 additions and 6430 deletions

View File

@@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-20.04
steps:
- name: Check first line
uses: gsactions/commit-message-checker@v1
uses: gsactions/commit-message-checker@16fa2d5de096ae0d35626443bcd24f1e756cafee # v2.0.0
with:
pattern: '^(feat|fix|test|bench|docs|chore|refactor|perf)\((compiler|backend|frontend|optimizer|tools|ci|common).*\): '
flags: 'gs'

View File

@@ -47,13 +47,8 @@ jobs:
run: |
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
# SSH private key is required as some dependencies are from private repos
- uses: webfactory/ssh-agent@v0.7.0
with:
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
- name: Fetch submodules
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0
submodules: recursive
@@ -100,7 +95,7 @@ jobs:
make ${{ env.BENCHMARK_TARGET }}
- name: Upload raw results artifact
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: compiler_${{ github.sha }}_raw
path: compilers/concrete-compiler/compiler/benchmarks_results.json
@@ -120,13 +115,13 @@ jobs:
--throughput
- name: Upload parsed results artifact
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: compiler_${{ github.sha }}
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
repository: zama-ai/slab
path: slab

View File

@@ -45,12 +45,6 @@ jobs:
echo "Request ID: ${{ inputs.request_id }}"
echo "Matrix item: ${{ inputs.matrix_item }}"
# A SSH private key is required as some dependencies are from private repos
- name: Set up SSH agent
uses: webfactory/ssh-agent@v0.7.0
with:
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
- name: Set up env
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -58,7 +52,7 @@ jobs:
echo "SSH_AUTH_SOCK_DIR=$(dirname $SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
- name: Fetch repository
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
@@ -70,7 +64,7 @@ jobs:
run: mkdir build
- name: Build compiler
uses: addnab/docker-run-action@v3
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
id: build-compiler
with:
registry: ghcr.io
@@ -86,16 +80,16 @@ jobs:
${{ env.DOCKER_GPU_OPTION }}
shell: bash
run: |
rustup toolchain install nightly-2024-01-31
rustup toolchain install nightly-2024-07-01
set -e
cd /concrete/compilers/concrete-compiler/compiler
rm -rf /build/*
make DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build all build-end-to-end-dataflow-tests
make DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build all
echo "Debug: ccache statistics (after the build):"
ccache -s
- name: Build compiler Dialects docs and check diff
uses: addnab/docker-run-action@v3
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
id: build-compiler-docs
with:
registry: ghcr.io
@@ -127,7 +121,7 @@ jobs:
run: echo "MINIMAL_TESTS=ON" >> $GITHUB_ENV
- name: Test compiler
uses: addnab/docker-run-action@v3
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
with:
registry: ghcr.io
image: ${{ env.DOCKER_IMAGE_TEST }}
@@ -140,13 +134,13 @@ jobs:
shell: bash
run: |
set -e
rustup toolchain install nightly-2024-01-31
rustup toolchain install nightly-2024-07-01
cd /concrete/compilers/concrete-compiler/compiler
pip install pytest
dnf install -y libzstd libzstd-devel
sed "s/pytest/python -m pytest/g" -i Makefile
mkdir -p /tmp/concrete_compiler/gpu_tests/
make MINIMAL_TESTS=${{ env.MINIMAL_TESTS }} DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build run-tests run-end-to-end-dataflow-tests
make MINIMAL_TESTS=${{ env.MINIMAL_TESTS }} DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build run-tests
chmod -R ugo+rwx /tmp/KeySetCache
- name: Analyze logs
@@ -155,7 +149,7 @@ jobs:
ls -1 | xargs grep -H "WARNING RETRY" | sed -e "s/.log.*//g" | uniq -c | sed -re "s/ *([0-9]*) (.*)/::warning ::Test \2 retried \1 times/g" | cat
# - name: Archive python package
# uses: actions/upload-artifact@v3
# uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
# with:
# name: concrete-compiler.whl
# path: build/wheels/concrete_compiler-*-manylinux_{{ env.GLIB_VER }}_x86_64.whl

View File

@@ -47,7 +47,7 @@ jobs:
docker system prune -af
- name: Fetch repository
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0
submodules: recursive
@@ -74,13 +74,14 @@ jobs:
cd compilers/concrete-compiler/compiler
rm -rf /shared/build
make HPX_DIR=/shared/hpx install-hpx-from-source
make HPX_DIR=/shared/hpx BUILD_DIR=/shared/build CCACHE=ON DATAFLOW_EXECUTION_ENABLED=ON BINDINGS_PYTHON_ENABLED=OFF CUDA_SUPPORT=${{ env.CUDA_SUPPORT }} build-end-to-end-dataflow-tests
make HPX_DIR=/shared/hpx BUILD_DIR=/shared/build CCACHE=ON DATAFLOW_EXECUTION_ENABLED=ON BINDINGS_PYTHON_ENABLED=OFF CUDA_SUPPORT=${{ env.CUDA_SUPPORT }} build-end-to-end-tests
- name: Run end-to-end benchmarks
run: |
set -e
cd compilers/concrete-compiler/compiler
make BUILD_DIR=/shared/build run-end-to-end-distributed-tests
rm -rf /shared/KeyCache
make BUILD_DIR=/shared/build KEY_CACHE_DIRECTORY=/shared/KeyCache run-end-to-end-distributed-tests
- name: Instance cleanup
run: |

View File

@@ -45,12 +45,6 @@ jobs:
echo "Request ID: ${{ inputs.request_id }}"
echo "Matrix item: ${{ inputs.matrix_item }}"
# A SSH private key is required as some dependencies are from private repos
- name: Set up SSH agent
uses: webfactory/ssh-agent@v0.7.0
with:
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
- name: Set up env
# "Install rust" step require root user to have a HOME directory which is not set.
run: |
@@ -58,7 +52,7 @@ jobs:
echo "SSH_AUTH_SOCK_DIR=$(dirname $SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
- name: Fetch repository
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
@@ -70,7 +64,7 @@ jobs:
run: mkdir build
- name: Build and test compiler
uses: addnab/docker-run-action@v3
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
id: build-compiler
with:
registry: ghcr.io
@@ -86,7 +80,7 @@ jobs:
--gpus all
shell: bash
run: |
rustup toolchain install nightly-2024-01-31
rustup toolchain install nightly-2024-07-01
set -e
cd /concrete/compilers/concrete-compiler/compiler
rm -rf /build/*

View File

@@ -8,7 +8,7 @@ jobs:
FormattingAndLinting:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Format with clang-format (Cpp)
run: |
sudo apt install moreutils
@@ -34,6 +34,6 @@ jobs:
CheckLicense:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Check if sources include the license header
run: .github/workflows/scripts/check_for_license.sh

View File

@@ -24,12 +24,7 @@ jobs:
runson: ["aws-mac1-metal", "aws-mac2-metal"]
runs-on: ${{ matrix.runson }}
steps:
# A SSH private key is required as some dependencies are from private repos
- uses: webfactory/ssh-agent@v0.7.0
with:
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
@@ -45,7 +40,7 @@ jobs:
- name: Cache compilation (push)
if: github.event_name == 'push'
uses: actions/cache@v3
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: /Users/runner/Library/Caches/ccache
key: ${{ runner.os }}-${{ runner.arch }}-compilation-cache-${{ github.sha }}
@@ -54,7 +49,7 @@ jobs:
- name: Cache compilation (pull_request)
if: github.event_name == 'pull_request'
uses: actions/cache@v3
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: /Users/runner/Library/Caches/ccache
key: ${{ runner.os }}-${{ runner.arch }}-compilation-cache-${{ github.event.pull_request.base.sha }}

View File

@@ -49,12 +49,8 @@ jobs:
echo "AMI: ${{ inputs.instance_image_id }}"
echo "Type: ${{ inputs.instance_type }}"
echo "Request ID: ${{ inputs.request_id }}"
# SSH private key is required as some dependencies are from private repos
- uses: webfactory/ssh-agent@v0.7.0
with:
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
@@ -66,7 +62,6 @@ jobs:
- name: Build Image
run: |
DOCKER_BUILDKIT=1 docker build --no-cache \
--ssh default=${{ env.SSH_AUTH_SOCK }} \
--label "commit-sha=${{ github.sha }}" -t ${{ matrix.image }} -f ${{ matrix.dockerfile }} .
- name: Tag and Publish Image
@@ -88,7 +83,7 @@ jobs:
IMAGE: ghcr.io/zama-ai/hpx
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0
@@ -98,7 +93,7 @@ jobs:
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v44
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c # v44.5.24
- name: Login
id: login
@@ -127,7 +122,7 @@ jobs:
dockerfile: docker/Dockerfile.cuda-118-env
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0
@@ -137,7 +132,7 @@ jobs:
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v44
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c # v44.5.24
- name: Login
id: login

View File

@@ -16,17 +16,17 @@ jobs:
env:
RUSTFLAGS: -D warnings
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Rust install
uses: actions-rs/toolchain@v1
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
with:
toolchain: nightly-2024-01-31
toolchain: nightly-2024-07-01
override: true
components: rustfmt, clippy
- name: Download cargo cache
uses: Swatinem/rust-cache@v2
uses: Swatinem/rust-cache@23bce251a8cd2ffc3c1075eaa2367cf899916d84 # v2.7.3
- name: Formatting
run: |
@@ -63,13 +63,13 @@ jobs:
env:
RUSTFLAGS: -D warnings
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Setup rust toolchain for concrete-cpu
uses: ./.github/workflows/setup_rust_toolchain_for_concrete_cpu
- name: Download cargo cache
uses: Swatinem/rust-cache@v2
uses: Swatinem/rust-cache@23bce251a8cd2ffc3c1075eaa2367cf899916d84 # v2.7.3
- name: Tests
run: |
@@ -81,13 +81,13 @@ jobs:
env:
RUSTFLAGS: -D warnings
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Setup rust toolchain for concrete-cpu
uses: ./.github/workflows/setup_rust_toolchain_for_concrete_cpu
- name: Download cargo cache
uses: Swatinem/rust-cache@v2
uses: Swatinem/rust-cache@23bce251a8cd2ffc3c1075eaa2367cf899916d84 # v2.7.3
- name: Tests
run: |

View File

@@ -38,11 +38,6 @@ jobs:
echo "Request ID: ${{ inputs.request_id }}"
echo "User Inputs: ${{ inputs.user_inputs }}"
- name: Set up SSH agent
uses: webfactory/ssh-agent@v0.7.0
with:
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
- name: Set up GitHub environment
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -50,7 +45,7 @@ jobs:
echo "SSH_AUTH_SOCK_DIR=$(dirname $SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
@@ -65,7 +60,7 @@ jobs:
run: mkdir build
- name: Build wheel
uses: addnab/docker-run-action@v3
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
id: build-compiler-bindings
with:
registry: ghcr.io
@@ -104,7 +99,7 @@ jobs:
deactivate
- name: Setup Python
uses: actions/setup-python@v5
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
with:
python-version: ${{ matrix.python-version }}

View File

@@ -0,0 +1,125 @@
name: Concrete Python Benchmark
on:
workflow_dispatch:
schedule:
- cron: "0 1 * * SAT"
env:
DOCKER_IMAGE: ghcr.io/zama-ai/concrete-compiler
GLIB_VER: 2_28
jobs:
setup-instance:
name: Setup Instance
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: m7i-cpu-bench
concrete-python-benchmarks:
name: Run Concrete Python Benchmarks
needs: setup-instance
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
- name: Checkout
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
fetch-depth: 0
- name: Benchmark
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
id: build-compiler-bindings
with:
registry: ghcr.io
image: ${{ env.DOCKER_IMAGE }}
username: ${{ secrets.GHCR_LOGIN }}
password: ${{ secrets.GHCR_PASSWORD }}
options: >-
-v ${{ github.workspace }}:/concrete
-v ${{ github.workspace }}/build:/build
-v ${{ env.SSH_AUTH_SOCK }}:/ssh.socket
-e SSH_AUTH_SOCK=/ssh.socket
${{ env.DOCKER_GPU_OPTION }}
shell: bash
run: |
set -e
rustup toolchain install nightly-2024-07-01
rm -rf /build/*
export PYTHON=${{ format('python{0}', matrix.python-version) }}
echo "Using $PYTHON"
dnf -y install graphviz graphviz-devel
cd /concrete/frontends/concrete-python
make PYTHON=$PYTHON venv
source .venv/bin/activate
cd /concrete/compilers/concrete-compiler/compiler
make BUILD_DIR=/build CCACHE=ON DATAFLOW_EXECUTION_ENABLED=ON Python3_EXECUTABLE=$(which python) python-bindings
echo "Debug: ccache statistics (after the build):"
ccache -s
cd /concrete/frontends/concrete-python
export COMPILER_BUILD_DIRECTORY="/build"
export PROGRESS_MACHINE_NAME="m7i.48xlarge"
make benchmark
make process-benchmark-results-for-grafana
deactivate
- name: Checkout Slab repo
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
with:
repository: zama-ai/slab
path: slab
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash
run: |
echo "Computing HMac on results file"
SIGNATURE="$(slab/scripts/hmac_calculator.sh frontends/concrete-python/progress.processed.json '${{ secrets.JOB_SECRET }}')"
cd frontends/concrete-python
echo "Sending results to Slab..."
curl -v -k \
-H "Content-Type: application/json" \
-H "X-Slab-Repository: ${{ github.repository }}" \
-H "X-Slab-Command: store_data_v2" \
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
-d @progress.processed.json \
${{ secrets.SLAB_URL }}
teardown-instance:
name: Teardown Instance
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
needs: [ setup-instance, concrete-python-benchmarks ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}

View File

@@ -7,7 +7,7 @@ jobs:
Checks:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Pre-Commit Checks
run: |
sudo apt install -y graphviz libgraphviz-dev

View File

@@ -20,7 +20,7 @@ jobs:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Get version from tag
run: |
@@ -34,7 +34,7 @@ jobs:
docker image build -t ${{ env.DOCKER_IMAGE_NAME }} --build-arg version=${{ env.VERSION }} -f ${{ env.DOCKER_FILE }} empty_context
- name: Login to Docker Hub
uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

View File

@@ -50,7 +50,7 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
fetch-depth: 0
@@ -73,7 +73,7 @@ jobs:
run: cp frontends/concrete-python/version.txt frontends/concrete-python/concrete/fhe/version.py
- name: Build wheel
uses: addnab/docker-run-action@v3
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
id: build-compiler-bindings
with:
registry: ghcr.io
@@ -90,7 +90,7 @@ jobs:
run: |
set -e
rustup toolchain install nightly-2024-01-31
rustup toolchain install nightly-2024-07-01
rm -rf /build/*
export PYTHON=${{ format('python{0}', matrix.python-version) }}
@@ -116,7 +116,7 @@ jobs:
deactivate
- name: Upload wheel
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: ${{ format('wheel-{0}-linux-x86', matrix.python-version) }}
path: frontends/concrete-python/dist/*manylinux*.whl
@@ -131,7 +131,7 @@ jobs:
runs-on: ${{ matrix.runs-on }}
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
fetch-depth: 0
@@ -193,7 +193,7 @@ jobs:
deactivate
- name: Upload wheel
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: ${{ format('wheel-{0}-{1}', matrix.python-version, matrix.runs-on) }}
path: frontends/concrete-python/dist/*macos*.whl
@@ -206,7 +206,7 @@ jobs:
outputs:
hash: ${{ steps.hash.outputs.hash }}
steps:
- uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
path: frontends/concrete-python/dist
pattern: wheel-*
@@ -230,20 +230,20 @@ jobs:
needs: [build-linux-x86, build-macos, provenance]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0
- uses: actions/download-artifact@v4
- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
path: wheels
pattern: 'wheel-*'
merge-multiple: true
- uses: actions/download-artifact@v4
- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
pattern: '*.intoto.jsonl'
# When building a new tag, create a new draft release.
# When building a new public tag, create a new draft release.
- name: create draft release
if: ${{ env.RELEASE_TYPE == 'public' || env.RELEASE_TYPE == 'nightly' }}
if: ${{ env.RELEASE_TYPE == 'public'}}
run: |
export TAG=$(git describe --tags --abbrev=0)
echo $TAG
@@ -278,7 +278,7 @@ jobs:
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/zama-ai/concrete/actions/workflows/concrete_python_push_docker_image.yml/dispatches \
-d "{\"ref\": \"$TAG\", \"inputs\": {\"tag\":\"$TAG\"}}"
-d "{\"ref\": \"$TAG\", \"inputs\": {\"tag\":\"v$TAG\"}}"
test-linux-x86:
needs: [build-linux-x86]
@@ -289,16 +289,16 @@ jobs:
runs-on: ${{ github.event.inputs.runner_name }}
steps:
- name: Setup Python
uses: actions/setup-python@v5
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
with:
python-version: ${{ matrix.python-version }}
- name: Download wheels
uses: actions/download-artifact@v4
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ format('wheel-{0}-linux-x86', matrix.python-version) }}
path: ${{ format('wheel-{0}-linux-x86', matrix.python-version) }}
- name: Checkout the repository
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
path: repo
@@ -339,12 +339,12 @@ jobs:
runs-on: ${{ matrix.runs-on }}
steps:
- name: Download wheels
uses: actions/download-artifact@v4
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ format('wheel-{0}-{1}', matrix.python-version, matrix.runs-on) }}
path: ${{ format('wheel-{0}-{1}', matrix.python-version, matrix.runs-on) }}
- name: Checkout the repository
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
path: repo

View File

@@ -51,7 +51,7 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
fetch-depth: 0
@@ -74,7 +74,7 @@ jobs:
run: cp frontends/concrete-python/version.txt frontends/concrete-python/concrete/fhe/version.py
- name: Build wheel
uses: addnab/docker-run-action@v3
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
id: build-compiler-bindings
with:
registry: ghcr.io
@@ -90,7 +90,7 @@ jobs:
run: |
set -e
rustup toolchain install nightly-2024-01-31
rustup toolchain install nightly-2024-07-01
rm -rf /build/*
export PYTHON=${{ format('python{0}', matrix.python-version) }}
@@ -104,7 +104,7 @@ jobs:
cd /concrete/compilers/concrete-compiler/compiler
make BUILD_DIR=/build CCACHE=ON DATAFLOW_EXECUTION_ENABLED=OFF Python3_EXECUTABLE=$(which python) \
CUDA_SUPPORT=ON CUDA_PATH=${{ env.CUDA_PATH }} python-bindings
CUDA_SUPPORT=ON TIMING_ENABLED=ON CUDA_PATH=${{ env.CUDA_PATH }} python-bindings
echo "Debug: ccache statistics (after the build):"
ccache -s
@@ -117,7 +117,7 @@ jobs:
deactivate
- name: Upload wheel
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: ${{ format('wheel-{0}-linux-x86', matrix.python-version) }}
path: frontends/concrete-python/dist/*manylinux*.whl
@@ -126,10 +126,12 @@ jobs:
push:
needs: [build-linux-x86]
runs-on: ubuntu-latest
outputs:
wheel_version: ${{ steps.version.outputs.wheel_version }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- uses: actions/download-artifact@v4
- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
path: wheels
merge-multiple: true
@@ -155,10 +157,17 @@ jobs:
# update indexes and invalidate cloudfront cache
python .github/workflows/scripts/s3_update_html_indexes.py
- name: Output Wheel Version
id: version
run: |
export VERSION=`ls ./wheels/*manylinux* | head -n1 | cut -d "-" -f2`
echo "VERSION=$VERSION"
echo "wheel_version=$VERSION" >> "$GITHUB_OUTPUT"
test-gpu-wheel:
needs: [push]
uses: ./.github/workflows/start_slab.yml
secrets: inherit
with:
command: concrete-python-test-gpu-wheel
user_inputs: "TODO"
user_inputs: "${{ needs.push.outputs.wheel_version }}"

View File

@@ -35,16 +35,15 @@ jobs:
runs-on: ${{ github.event.inputs.runner_name }}
steps:
- name: Setup Python
uses: actions/setup-python@v5
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
with:
python-version: ${{ matrix.python-version }}
# TODO: specify CP version
- name: Install CP
run: pip install --pre --extra-index-url https://pypi.zama.ai/gpu/ concrete-python
run: pip install --pre --extra-index-url https://pypi.zama.ai/gpu/ "concrete-python==${{ env.CP_VERSION }}"
- name: Checkout the repository
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
path: repo

View File

@@ -21,13 +21,8 @@ jobs:
runs-on: ${{ matrix.machine }}
steps:
- name: Set up SSH agent
uses: webfactory/ssh-agent@v0.7.0
with:
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
@@ -41,7 +36,7 @@ jobs:
- name: Cache Compilation (push)
if: github.event_name == 'push'
uses: actions/cache@v3
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: /Users/runner/Library/Caches/ccache
key: ${{ runner.os }}-${{ runner.arch }}-compilation-cache-${{ github.sha }}
@@ -50,7 +45,7 @@ jobs:
- name: Cache Compilation (pull_request)
if: github.event_name == 'pull_request'
uses: actions/cache@v3
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: /Users/runner/Library/Caches/ccache
key: ${{ runner.os }}-${{ runner.arch }}-compilation-cache-${{ github.event.pull_request.base.sha }}

View File

@@ -42,11 +42,6 @@ jobs:
echo "Type: ${{ inputs.instance_type }}"
echo "Request ID: ${{ inputs.request_id }}"
- name: Set up SSH agent
uses: webfactory/ssh-agent@v0.7.0
with:
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
- name: Set up GitHub environment
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -54,7 +49,7 @@ jobs:
echo "SSH_AUTH_SOCK_DIR=$(dirname $SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
@@ -66,7 +61,7 @@ jobs:
uses: ./.github/workflows/setup_rust_toolchain_for_concrete_cpu
- name: Build bindings
uses: addnab/docker-run-action@v3
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
if: ${{ !contains(inputs.instance_type, 'p3') }}
id: build-compiler-bindings
with:
@@ -81,7 +76,7 @@ jobs:
-e SSH_AUTH_SOCK=/ssh.socket
shell: bash
run: |
rustup toolchain install nightly-2024-01-31
rustup toolchain install nightly-2024-07-01
set -e
rm -rf /build/*
@@ -98,7 +93,7 @@ jobs:
ccache -s
- name: Test
uses: addnab/docker-run-action@v3
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
if: ${{ !contains(inputs.instance_type, 'p3') }}
with:
registry: ghcr.io
@@ -124,7 +119,7 @@ jobs:
KEY_CACHE_DIRECTORY=./KeySetCache make pytest
- name: Build bindings gpu
uses: addnab/docker-run-action@v3
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
if: ${{ contains(inputs.instance_type, 'p3') }}
id: build-compiler-bindings-gpu
with:
@@ -155,7 +150,7 @@ jobs:
ccache -s
- name: Test gpu
uses: addnab/docker-run-action@v3
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
if: ${{ contains(inputs.instance_type, 'p3') }}
with:
registry: ghcr.io

View File

@@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Linelint
uses: fernandrone/linelint@0.0.4
id: linelint

View File

@@ -39,20 +39,20 @@ jobs:
push-main: ${{ steps.github.outputs.push-main }}
steps:
- name: Checkout the repository
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
- name: Get changed files in the concrete-compiler directory
id: compiler
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./compilers/concrete-compiler/**
- name: Get changed files for concrete-optimizer
id: optimizer
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: |
./compilers/concrete-optimizer/**
@@ -60,43 +60,43 @@ jobs:
- name: Get changed files in the concrete-cpu directory
id: concrete-cpu
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./backends/concrete-cpu/implementation/**
- name: Get changed files in the concrete-python directory
id: concrete-python
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./frontends/concrete-python/**
- name: Check if compiler_build_and_test_cpu workflow has changed
id: concrete-compiler-cpu-workflow
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./.github/workflows/compiler_build_and_test_cpu.yml
- name: Check if compiler_build_and_test_gpu workflow has changed
id: concrete-compiler-gpu-workflow
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./.github/workflows/compiler_build_and_test_gpu.yml
- name: Check if compiler_format_and_linting.yml workflow has changed
id: concrete-compiler-format-and-linting-workflow
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./.github/workflows/compiler_format_and_linting.yml
- name: Check if compiler_macos_build_and_test workflow has changed
id: concrete-compiler-macos-workflow
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./.github/workflows/compiler_macos_build_and_test.yml
- name: Check if compiler_publish_docker_images workflow has changed
id: concrete-compiler-docker-images-workflow
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: |
./.github/workflows/compiler_publish_docker_images.yml
@@ -104,31 +104,31 @@ jobs:
- name: Check if concrete_cpu_test workflow has changed
id: concrete-cpu-workflow
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./.github/workflows/concrete_cpu_test.yml
- name: Check if concrete_python_checks workflow has changed
id: concrete-python-workflow
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./.github/workflows/concrete_python_checks.yml
- name: Check if optimizer workflow has changed
id: concrete-optimizer-workflow
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./.github/workflows/optimizer.yml
- name: Get changed files in the concrete-cpu directory
id: concrete-cpu-api
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./backends/concrete-cpu/implementation/include/**
- name: Get changed files in the concrete-cuda directory
id: concrete-cuda-api
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
with:
files: ./backends/concrete-cuda/implementation/include/**
@@ -252,7 +252,7 @@ jobs:
# Concrete-ML tests #############################
concrete-ml-tests-linux:
needs: file-change
if: needs.file-change.outputs.concrete-python == 'true' || needs.file-change.outputs.push-main
if: needs.file-change.outputs.concrete-python == 'true' || needs.file-change.outputs.compiler == 'true' || needs.file-change.outputs.push-main
uses: ./.github/workflows/start_slab.yml
secrets: inherit
with:

View File

@@ -4,6 +4,7 @@ on:
pull_request:
paths:
- '**.md'
- .github/workflows/markdown_link_check.yml
push:
branches:
- main
@@ -13,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- uses: gaurav-nelson/github-action-markdown-link-check@v1
- uses: gaurav-nelson/github-action-markdown-link-check@5c5dfc0ac2e225883c0e5f03a85311ec2830d368 # v1
with:
use-quiet-mode: 'yes'
use-verbose-mode: 'yes'

View File

@@ -46,13 +46,8 @@ jobs:
run: |
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
# SSH private key is required as some dependencies are from private repos
- uses: webfactory/ssh-agent@v0.7.0
with:
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
- name: Fetch submodules
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0
submodules: recursive
@@ -86,7 +81,7 @@ jobs:
make BINDINGS_PYTHON_ENABLED=OFF ML_BENCH_SUBSET_ID=${{ inputs.matrix_item }} run-mlbench-subset
- name: Upload raw results artifact
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: ${{ github.sha }}_raw
path: compiler/benchmarks_results.json
@@ -105,13 +100,13 @@ jobs:
--bench-date "${{ env.BENCH_DATE }}"
- name: Upload parsed results artifact
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: ${{ github.sha }}
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
repository: zama-ai/slab
path: slab

View File

@@ -24,7 +24,7 @@ jobs:
env:
RUSTFLAGS: -D warnings
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: "Setup"
uses: ./.github/workflows/optimizer_setup
@@ -56,7 +56,7 @@ jobs:
benchmarks:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: "Setup"
uses: ./.github/workflows/optimizer_setup
@@ -73,7 +73,7 @@ jobs:
# for artifacts restrictions see https://github.com/actions/download-artifact/issues/3
# for cache restrictions see https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#restrictions-for-accessing-a-cache
# and https://github.com/actions/cache/issues/692
uses: dawidd6/action-download-artifact@v5
uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
workflow_conclusion: ""
@@ -83,7 +83,7 @@ jobs:
path: ./benchmark
- name: Save benchmark result to file
uses: benchmark-action/github-action-benchmark@v1
uses: benchmark-action/github-action-benchmark@4de1bed97a47495fc4c5404952da0499e31f5c29 # v1.20.3
with:
tool: 'cargo'
output-file-path: ./compilers/concrete-optimizer/bench_result.txt
@@ -96,7 +96,7 @@ jobs:
summary-always: true
- name: Upload benchmark data
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
path: ./benchmark
name: ${{ runner.os }}-benchmark

View File

@@ -7,15 +7,9 @@ runs:
using: "composite"
steps:
- name: Rust install
uses: actions-rs/toolchain@v1
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
with:
toolchain: stable
- name: Download cargo cache
uses: Swatinem/rust-cache@v2
# A SSH private key is required as some dependencies are from private repos
- name: Set ssh keys
uses: webfactory/ssh-agent@v0.6.0
with:
ssh-private-key: ${{ inputs.ssh_private_key }}
uses: Swatinem/rust-cache@23bce251a8cd2ffc3c1075eaa2367cf899916d84 # v2.7.3

View File

@@ -7,7 +7,7 @@ jobs:
refresh:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v3
- name: Update index and invalidate cache
env:

View File

@@ -2,7 +2,7 @@ runs:
using: "composite"
steps:
- name: Install rust
uses: actions-rs/toolchain@v1
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
with:
toolchain: nightly-2024-01-31
toolchain: nightly-2024-07-01
override: true

View File

@@ -28,7 +28,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout concrete
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0
@@ -39,7 +39,7 @@ jobs:
echo "GIT_REF=${{ github.ref_name }}" >> $GITHUB_ENV
- name: Checkout Slab repo
uses: actions/checkout@v3
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
repository: zama-ai/slab
path: slab

View File

@@ -129,7 +129,7 @@ assert result == add(2, 6)
- [[Video tutorial] How To Get Started With Concrete - Zama's Fully Homomorphic Encryption Compiler](https://www.zama.ai/post/how-to-started-with-concrete-zama-fully-homomorphic-encryption-compiler)
- [The Encrypted Game of Life in Python Using Concrete](https://www.zama.ai/post/the-encrypted-game-of-life-using-concrete-python)
- [Encrypted Key-value Database Using Homomorphic Encryption](https://www.zama.ai/post/encrypted-key-value-database-using-homomorphic-encryption)
- [SHA-256 Implementation Using Concrete](https://github.com/zama-ai/concrete/blob/main/docs/application-tutorial/sha256.ipynb)
- [SHA-256 Implementation Using Concrete](https://github.com/zama-ai/concrete/blob/main/frontends/concrete-python/examples/sha256/sha256.ipynb)
*Explore more useful resources in [Concrete tutorials](https://docs.zama.ai/concrete/v/main-1/tutorials/see-all-tutorials) and [Awesome Zama repo](https://github.com/zama-ai/awesome-zama?tab=readme-ov-file#concrete). If you have built awesome projects using Concrete, please let us know and we will be happy to showcase them here!*

View File

@@ -45,6 +45,15 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bincode"
version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
dependencies = [
"serde",
]
[[package]]
name = "bitflags"
version = "1.3.2"
@@ -59,9 +68,9 @@ checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
[[package]]
name = "bytemuck"
version = "1.13.1"
version = "1.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea"
checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e"
[[package]]
name = "cast"
@@ -170,14 +179,14 @@ dependencies = [
"cbindgen",
"concrete-cpu-noise-model",
"concrete-csprng",
"concrete-fft 0.2.1",
"concrete-fft",
"concrete-security-curves",
"criterion",
"dyn-stack",
"libc",
"num-complex",
"once_cell",
"pulp 0.10.4",
"pulp",
"rayon",
"readonly",
"tfhe",
@@ -193,9 +202,9 @@ dependencies = [
[[package]]
name = "concrete-csprng"
version = "0.4.0"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c7080e711c39c3dda834604f7c31f4634a38b8dddeb9f24db5d7801751415ad"
checksum = "90518357249582c16a6b64d7410243dfb3109d5bf0ad1665c058c9a59f2fc4cc"
dependencies = [
"aes",
"libc",
@@ -204,29 +213,26 @@ dependencies = [
[[package]]
name = "concrete-fft"
version = "0.2.1"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ab720f85aa80be9d78fba5a941458ec50d97c7856aced76707f0bf227a0c6ca"
checksum = "3144f883422ee22c65d4f408c11b3406513eadc0d50b3d65bfadcb97852817e0"
dependencies = [
"aligned-vec",
"bytemuck",
"dyn-stack",
"num-complex",
"pulp 0.11.11",
"pulp",
"serde",
]
[[package]]
name = "concrete-fft"
version = "0.3.0"
name = "concrete-ntt"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb823cf0c7a83ac8f7e38bcda73d754fb8f57797f342cdd7c20a61a1db8b058"
checksum = "b4f4643dbd5de069e099122ae6c2bbd3db70d69ffec348dfc228448d635f949e"
dependencies = [
"aligned-vec",
"bytemuck",
"dyn-stack",
"num-complex",
"pulp 0.11.11",
"serde",
"pulp",
]
[[package]]
@@ -387,6 +393,17 @@ dependencies = [
"version_check",
]
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "half"
version = "1.8.2"
@@ -500,9 +517,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.147"
version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "libm"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "linux-raw-sys"
@@ -527,9 +550,9 @@ dependencies = [
[[package]]
name = "num-complex"
version = "0.4.3"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d"
checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6"
dependencies = [
"bytemuck",
"num-traits",
@@ -573,6 +596,12 @@ version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac"
[[package]]
name = "paste"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "plotters"
version = "0.3.5"
@@ -612,30 +641,14 @@ dependencies = [
[[package]]
name = "pulp"
version = "0.10.4"
version = "0.18.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "284c392c810680912400c6f70879a8cde404344db6b68ff52cc3990c020324d1"
dependencies = [
"bytemuck",
]
[[package]]
name = "pulp"
version = "0.11.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "866e8018d6397b0717100dd4a7948fc8cbc8c4b8ce3e39e98a0e1e878d3ba925"
dependencies = [
"bytemuck",
]
[[package]]
name = "pulp"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50ff10f8f3224a1cab64371fbab0fff9b9cb5892076eb9032dc825c2e50894e4"
checksum = "0ec8d02258294f59e4e223b41ad7e81c874aa6b15bc4ced9ba3965826da0eed5"
dependencies = [
"bytemuck",
"libm",
"num-complex",
"reborrow",
]
[[package]]
@@ -647,6 +660,15 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "rayon"
version = "1.7.0"
@@ -835,19 +857,47 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
[[package]]
name = "tfhe"
version = "0.4.1"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df9c5cc5c4a1bd0bcfc2e7a3e83a8851dab02ff59d63ca8bc17da0e069498f71"
checksum = "273f968d322dd02cd76a51fe8e2312b5807631d33e99ea06ccfd8034d558c562"
dependencies = [
"aligned-vec",
"bincode",
"bytemuck",
"concrete-csprng",
"concrete-fft 0.3.0",
"concrete-fft",
"concrete-ntt",
"dyn-stack",
"itertools 0.11.0",
"pulp 0.13.2",
"paste",
"pulp",
"rand_core",
"rayon",
"serde",
"tfhe-versionable",
]
[[package]]
name = "tfhe-versionable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7977829d8b2d59a16d9780ffbf8e4084a8d67f32c0e557b647136d094d391b2"
dependencies = [
"aligned-vec",
"num-complex",
"serde",
"tfhe-versionable-derive",
]
[[package]]
name = "tfhe-versionable-derive"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "754aede9c522d81d852b46714d42137d307f418b8b6eed4db03d1466080f4f9d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.22",
]
[[package]]
@@ -897,6 +947,12 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.87"

View File

@@ -10,18 +10,18 @@ crate-type = ["lib", "staticlib"]
[dependencies]
concrete-csprng = { version = "0.4", optional = true, features = [
concrete-csprng = { version = "0.4.1", optional = true, features = [
"generator_fallback",
] }
concrete-cpu-noise-model = { path = "../noise-model/" }
concrete-security-curves = { path = "../../../tools/parameter-curves/concrete-security-curves-rust" }
libc = { version = "0.2", default-features = false }
pulp = { version = "0.10", default-features = false }
pulp = { version = "0.18.8", default-features = false }
dyn-stack = { version = "0.9", default-features = false }
readonly = "0.2"
aligned-vec = { version = "0.5", default-features = false }
concrete-fft = { version = "0.2.1", default-features = false }
bytemuck = "1.12"
concrete-fft = { version = "0.4.1", default-features = false }
bytemuck = "1.14.3"
num-complex = { version = "0.4", default-features = false, features = [
"bytemuck",
] }
@@ -29,16 +29,16 @@ num-complex = { version = "0.4", default-features = false, features = [
rayon = { version = "1.6", optional = true }
once_cell = { version = "1.16", optional = true }
tfhe = { version = "0.4", features = [] }
tfhe = { version = "0.7", features = [] }
[target.x86_64-unknown-unix-gnu.dependencies]
tfhe = { version = "0.4", features = ["x86_64-unix"] }
tfhe = { version = "0.7", features = ["x86_64-unix"] }
[target.aarch64-unknown-unix-gnu.dependencies]
tfhe = { version = "0.4", features = ["aarch64-unix"] }
tfhe = { version = "0.7", features = ["aarch64-unix"] }
[target.x86_64-pc-windows-gnu.dependencies]
tfhe = { version = "0.4", features = ["x86_64"] }
tfhe = { version = "0.7", features = ["x86_64"] }
[features]
default = ["parallel", "std", "csprng"]

View File

@@ -73,14 +73,14 @@ pub unsafe extern "C" fn concrete_cpu_init_lwe_bootstrap_key_u64(
&lwe_sk,
&glwe_sk,
&mut bsk,
Variance::from_variance(variance),
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
),
Parallelism::Rayon => par_generate_lwe_bootstrap_key(
&lwe_sk,
&glwe_sk,
&mut bsk,
Variance::from_variance(variance),
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
),
}
@@ -151,14 +151,14 @@ pub unsafe extern "C" fn concrete_cpu_init_seeded_lwe_bootstrap_key_u64(
&lwe_sk,
&glwe_sk,
&mut bsk,
Variance::from_variance(variance),
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
seeder,
),
Parallelism::Rayon => par_generate_seeded_lwe_bootstrap_key(
&lwe_sk,
&glwe_sk,
&mut bsk,
Variance::from_variance(variance),
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
seeder,
),
}

View File

@@ -53,7 +53,7 @@ pub unsafe extern "C" fn concrete_cpu_init_lwe_keyswitch_key_u64(
&input_key,
&output_key,
&mut ksk,
Variance::from_variance(variance),
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
)
});
@@ -110,7 +110,7 @@ pub unsafe extern "C" fn concrete_cpu_init_seeded_lwe_keyswitch_key_u64(
&input_key,
&output_key,
&mut seeded_ksk,
Variance::from_variance(variance),
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
seeder,
)
});

View File

@@ -53,7 +53,7 @@ pub unsafe extern "C" fn concrete_cpu_encrypt_lwe_ciphertext_u64(
&lwe_sk,
&mut lwe_out,
Plaintext(input),
Variance::from_variance(variance),
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
);
});
@@ -95,7 +95,7 @@ pub unsafe extern "C" fn concrete_cpu_encrypt_seeded_lwe_ciphertext_u64(
&lwe_sk,
&mut seeded_lwe_ciphertext,
Plaintext(input),
Variance::from_variance(variance),
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
seeder,
);
*seeded_lwe_out = seeded_lwe_ciphertext.into_scalar();
@@ -145,7 +145,7 @@ pub unsafe extern "C" fn concrete_cpu_encrypt_ggsw_ciphertext_u64(
&glwe_sk,
&mut ggsw_out,
Plaintext(input),
Variance::from_variance(variance),
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
);
});

View File

@@ -71,14 +71,14 @@ pub unsafe extern "C" fn concrete_cpu_init_lwe_circuit_bootstrap_private_functio
&mut fpksk_list,
&input_key,
&output_key,
Variance::from_variance(variance),
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
),
Parallelism::Rayon => par_generate_circuit_bootstrap_lwe_pfpksk_list(
&mut fpksk_list,
&input_key,
&output_key,
Variance::from_variance(variance),
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
),
}

View File

@@ -1,5 +1,4 @@
#![allow(clippy::missing_safety_doc, dead_code)]
#![cfg_attr(feature = "nightly", feature(stdsimd))]
#![cfg_attr(feature = "nightly", feature(avx512_target_feature))]
extern crate alloc;

View File

@@ -1,3 +1,4 @@
#include <err.h>
#include "bootstrap_amortized.cuh"
/*
@@ -85,6 +86,8 @@ void scratch_cuda_bootstrap_amortized_32(void *v_stream, uint32_t gpu_index,
input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
}
@@ -141,6 +144,8 @@ void scratch_cuda_bootstrap_amortized_64(void *v_stream, uint32_t gpu_index,
input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
}
@@ -216,6 +221,8 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
lwe_idx, max_shared_memory);
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
}
@@ -353,6 +360,8 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
lwe_idx, max_shared_memory);
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
}

View File

@@ -1,3 +1,4 @@
#include <err.h>
#include "bootstrap_fast_low_latency.cuh"
#include "bootstrap_low_latency.cuh"
/*
@@ -100,6 +101,8 @@ uint64_t get_buffer_size_bootstrap_low_latency_64(
input_lwe_ciphertext_count, max_shared_memory);
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
return 0;
break;
}
@@ -244,6 +247,8 @@ void scratch_cuda_bootstrap_low_latency_32(
allocate_gpu_memory);
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
}
@@ -377,6 +382,8 @@ void scratch_cuda_bootstrap_low_latency_64(
allocate_gpu_memory);
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
}
@@ -527,6 +534,8 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
num_samples, num_lut_vectors, max_shared_memory);
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
}
@@ -744,7 +753,10 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
(uint64_t *)lwe_array_in, (double2 *)bootstrapping_key, pbs_buffer,
glwe_dimension, lwe_dimension, polynomial_size, base_log, level_count,
num_samples, num_lut_vectors, max_shared_memory);
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
}

View File

@@ -1,3 +1,4 @@
#include <err.h>
#include "bootstrap_fast_multibit.cuh"
#include "bootstrap_multibit.cuh"
#include "bootstrap_multibit.h"
@@ -172,6 +173,8 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
}
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
}
@@ -311,6 +314,8 @@ void scratch_cuda_multi_bit_pbs_64(
}
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
}

View File

@@ -8,6 +8,7 @@
#include "polynomial/polynomial.cuh"
#include <atomic>
#include <cstdint>
#include <err.h>
__device__ inline int get_start_ith_ggsw(int i, uint32_t polynomial_size,
int glwe_dimension,
@@ -235,6 +236,8 @@ void cuda_convert_lwe_bootstrap_key(double2 *dest, ST *src, void *v_stream,
}
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
@@ -446,6 +449,8 @@ void cuda_fourier_polynomial_mul(void *_input1, void *_input2, void *_output,
}
break;
default:
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
break;
}
cuda_drop_async(buffer, stream, gpu_index);

View File

@@ -31,20 +31,6 @@ pip install lazydocs
.venvtrash/bin/lazydocs --output-path="../docs/dev/api" --overview-file="README.md" --src-base-url="../../" --no-watermark concrete
cd -
# Add the files in the summary
FILES=$(cd docs && find dev/api -name "*.md")
TMP_FILE=$(mktemp /tmp/apidocs.XXXXXX)
rm -rf "$TMP_FILE"
touch "$TMP_FILE"
for f in $FILES
do
filename=$(echo "$f" | rev | cut -d '/' -f 1 | rev)
echo " * [$filename]($f)" >> "$TMP_FILE"
done
rm -rf "$FRESH_DIRECTORY"
# New files?

View File

@@ -206,6 +206,9 @@ then
git diff
fi
# Update the pandas files in CML
make update_encrypted_dataframe
# Launch CML tests with pytest (and ignore flaky ones)
# As compared to regular `make pytest`, known flaky errors from Concrete ML are simply ignored
# and coverage is disabled

View File

@@ -1,32 +1,42 @@
# This is the new version of Slab that handles multi backend providers.
[backend.aws.m7i-cpu-bench]
region = "eu-west-1"
image_id = "ami-002bdcd64b8472cf9" # Based on Ubuntu 22.4
instance_type = "m7i.48xlarge"
security_group = ["sg-0e55cc31dfda0d8a7", ]
[profile.m7i-cpu-bench]
region = "eu-west-1"
image_id = "ami-002bdcd64b8472cf9" # Based on Ubuntu 22.4
instance_type = "m7i.48xlarge"
security_group= ["sg-0e55cc31dfda0d8a7", ]
[profile.m7i-cpu-test]
region = "eu-west-1"
image_id = "ami-002bdcd64b8472cf9"
instance_type = "m7i.16xlarge"
security_group= ["sg-0e55cc31dfda0d8a7", ]
[profile.m7i-metal]
region = "eu-west-1"
image_id = "ami-002bdcd64b8472cf9"
instance_type = "m7i.metal-24xl"
security_group= ["sg-0e55cc31dfda0d8a7", ]
[profile.gpu-bench]
region = "us-east-1"
image_id = "ami-08e27480d79e82238"
instance_type = "p3.2xlarge"
subnet_id = "subnet-8123c9e7"
security_group= ["sg-0f8b52622a2669491", ]
security_group= ["sg-017afab1f328af917", ]
# Docker is well configured for test inside docker in this AMI
[profile.gpu-test]
region = "us-east-1"
image_id = "ami-0c4773f5626d919b6"
image_id = "ami-0257c6ad39f902b5e"
instance_type = "p3.2xlarge"
subnet_id = "subnet-8123c9e7"
security_group= ["sg-0f8b52622a2669491", ]
security_group= ["sg-017afab1f328af917", ]
# It has CUDA Driver (<=12.5) and Docker installed
[profile.gpu-test-ubuntu22]
@@ -34,13 +44,14 @@ region = "us-east-1"
image_id = "ami-05385e0c3c574621f"
instance_type = "p3.2xlarge"
subnet_id = "subnet-8123c9e7"
security_group= ["sg-0f8b52622a2669491", ]
security_group= ["sg-017afab1f328af917", ]
[profile.slurm-cluster]
region = "eu-west-3"
image_id = "ami-0bb5bb9cb747b5ddd"
instance_id = "i-0e5ae2a14134d6275"
instance_type = "m6i.8xlarge"
security_group= ["sg-02dd8470fa845f31b", ]
#################################################
# Compiler commands
@@ -121,5 +132,5 @@ check_run_name = "Concrete Python Release (GPU)"
[command.concrete-python-test-gpu-wheel]
workflow = "concrete_python_test_gpu_wheel.yml"
profile = "gpu-test-ubuntu22"
profile = "gpu-test"
check_run_name = "Concrete Python Test GPU Wheel"

View File

@@ -136,7 +136,7 @@ ExternalProject_Add(
DOWNLOAD_COMMAND ""
CONFIGURE_COMMAND "" OUTPUT "${CONCRETE_CPU_STATIC_LIB}"
BUILD_ALWAYS true
BUILD_COMMAND cargo +nightly-2024-01-31 build --release --features=nightly
BUILD_COMMAND cargo +nightly-2024-07-01 build --release --features=nightly
BINARY_DIR "${CONCRETE_CPU_DIR}"
INSTALL_COMMAND ""
LOG_BUILD ON

View File

@@ -329,19 +329,9 @@ run-end-to-end-tests-gpu: build-end-to-end-test generate-gpu-tests
--backend=gpu \
$(FIXTURE_GPU_DIR)/*.yaml
## end-to-end-dataflow-tests
build-end-to-end-dataflow-tests: build-initialized
cmake --build $(BUILD_DIR) --target end_to_end_jit_auto_parallelization
cmake --build $(BUILD_DIR) --target end_to_end_jit_distributed
cmake --build $(BUILD_DIR) --target end_to_end_jit_aes_short
run-end-to-end-dataflow-tests: build-end-to-end-dataflow-tests
$(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_auto_parallelization
$(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_distributed
## end-to-end-distributed-tests
run-end-to-end-distributed-tests: $(GTEST_PARALLEL_PY) build-end-to-end-tests generate-cpu-tests
srun -n4 -c8 --kill-on-bad-exit=1 $(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_distributed
srun -n4 -c8 --kill-on-bad-exit=1 $(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_test \
--optimizer-strategy=dag-mono --dataflow-parallelize=1 \
$(FIXTURE_CPU_DIR)/*round*.yaml $(FIXTURE_CPU_DIR)/*relu*.yaml $(FIXTURE_CPU_DIR)/*linalg*.yaml
@@ -562,8 +552,6 @@ FORCE:
run-unit-tests \
run-python-tests \
build-end-to-end-tests \
build-end-to-end-dataflow-tests \
run-end-to-end-dataflow-tests \
run-random-end-to-end-tests-for-each-options \
opt \
mlir-opt \

View File

@@ -148,6 +148,9 @@ You can create a tarball containing libs, bins, and include files for the tools
### Build the Python Package
> [!IMPORTANT]
> The wheel built in the following steps is for `concrete-compiler` (which doesn't have the frontend layer) and not `concrete-python`. If you are interested in the `concrete-python` package, then you should build it from [here](https://github.com/zama-ai/concrete/tree/main/frontends/concrete-python) instead.
Currently supported platforms:
- Linux x86_64 for python 3.8, 3.9, 3.10, and 3.11

View File

@@ -19,7 +19,7 @@ include "concretelang/Dialect/FHE/Interfaces/FHEInterfaces.td"
class FHE_Op<string mnemonic, list<Trait> traits = []> :
Op<FHE_Dialect, mnemonic, traits>;
def FHE_ZeroEintOp : FHE_Op<"zero", [Pure, ConstantNoise]> {
def FHE_ZeroEintOp : FHE_Op<"zero", [Pure, ZeroNoise]> {
let summary = "Returns a trivial encrypted integer of 0";
let description = [{
@@ -34,7 +34,7 @@ def FHE_ZeroEintOp : FHE_Op<"zero", [Pure, ConstantNoise]> {
let results = (outs FHE_AnyEncryptedInteger:$out);
}
def FHE_ZeroTensorOp : FHE_Op<"zero_tensor", [Pure, ConstantNoise]> {
def FHE_ZeroTensorOp : FHE_Op<"zero_tensor", [Pure, ZeroNoise]> {
let summary = "Creates a new tensor with all elements initialized to an encrypted zero.";
let description = [{
@@ -52,7 +52,7 @@ def FHE_ZeroTensorOp : FHE_Op<"zero_tensor", [Pure, ConstantNoise]> {
let results = (outs Type<And<[TensorOf<[FHE_AnyEncryptedInteger]>.predicate, HasStaticShapePred]>>:$tensor);
}
def FHE_AddEintIntOp : FHE_Op<"add_eint_int", [Pure, BinaryEintInt, DeclareOpInterfaceMethods<Binary>]> {
def FHE_AddEintIntOp : FHE_Op<"add_eint_int", [Pure, BinaryEintInt, AdditiveNoise, DeclareOpInterfaceMethods<Binary>]> {
let summary = "Adds an encrypted integer and a clear integer";
let description = [{
@@ -85,7 +85,7 @@ def FHE_AddEintIntOp : FHE_Op<"add_eint_int", [Pure, BinaryEintInt, DeclareOpInt
let hasFolder = 1;
}
def FHE_AddEintOp : FHE_Op<"add_eint", [Pure, BinaryEint, DeclareOpInterfaceMethods<BinaryEint>]> {
def FHE_AddEintOp : FHE_Op<"add_eint", [Pure, BinaryEint, AdditiveNoise, DeclareOpInterfaceMethods<BinaryEint>]> {
let summary = "Adds two encrypted integers";
let description = [{
@@ -117,7 +117,7 @@ def FHE_AddEintOp : FHE_Op<"add_eint", [Pure, BinaryEint, DeclareOpInterfaceMeth
let hasVerifier = 1;
}
def FHE_SubIntEintOp : FHE_Op<"sub_int_eint", [Pure, BinaryIntEint]> {
def FHE_SubIntEintOp : FHE_Op<"sub_int_eint", [Pure, BinaryIntEint, AdditiveNoise]> {
let summary = "Subtract an encrypted integer from a clear integer";
let description = [{
@@ -149,7 +149,7 @@ def FHE_SubIntEintOp : FHE_Op<"sub_int_eint", [Pure, BinaryIntEint]> {
let hasVerifier = 1;
}
def FHE_SubEintIntOp : FHE_Op<"sub_eint_int", [Pure, BinaryEintInt, DeclareOpInterfaceMethods<Binary>]> {
def FHE_SubEintIntOp : FHE_Op<"sub_eint_int", [Pure, BinaryEintInt, AdditiveNoise, DeclareOpInterfaceMethods<Binary>]> {
let summary = "Subtract a clear integer from an encrypted integer";
let description = [{
@@ -182,7 +182,7 @@ def FHE_SubEintIntOp : FHE_Op<"sub_eint_int", [Pure, BinaryEintInt, DeclareOpInt
let hasFolder = 1;
}
def FHE_SubEintOp : FHE_Op<"sub_eint", [Pure, BinaryEint, DeclareOpInterfaceMethods<BinaryEint>]> {
def FHE_SubEintOp : FHE_Op<"sub_eint", [Pure, BinaryEint, AdditiveNoise, DeclareOpInterfaceMethods<BinaryEint>]> {
let summary = "Subtract an encrypted integer from an encrypted integer";
let description = [{
@@ -214,7 +214,7 @@ def FHE_SubEintOp : FHE_Op<"sub_eint", [Pure, BinaryEint, DeclareOpInterfaceMeth
let hasVerifier = 1;
}
def FHE_NegEintOp : FHE_Op<"neg_eint", [Pure, UnaryEint, DeclareOpInterfaceMethods<UnaryEint>]> {
def FHE_NegEintOp : FHE_Op<"neg_eint", [Pure, UnaryEint, AdditiveNoise, DeclareOpInterfaceMethods<UnaryEint>]> {
let summary = "Negates an encrypted integer";

View File

@@ -37,6 +37,22 @@ def ConstantNoise : OpInterface<"ConstantNoise"> {
let cppNamespace = "mlir::concretelang::FHE";
}
def ZeroNoise : OpInterface<"ZeroNoise"> {
let description = [{
An operation outputs a ciphertext with zero noise.
}];
let cppNamespace = "mlir::concretelang::FHE";
}
def AdditiveNoise : OpInterface<"AdditiveNoise"> {
let description = [{
An n-ary operation whose output noise is the unweighted sum of all input noises.
}];
let cppNamespace = "mlir::concretelang::FHE";
}
def UnaryEint : OpInterface<"UnaryEint"> {
let description = [{
A unary operation on scalars, with the operand encrypted.
@@ -63,7 +79,7 @@ def UnaryEint : OpInterface<"UnaryEint"> {
if (auto operandTy = dyn_cast<mlir::RankedTensorType>($_op->getOpOperand(0).get().getType())) {
return operandTy.getElementType();
} else return $_op->getOpOperand(0).get().getType();
}]>
}]>
];
}
@@ -124,8 +140,8 @@ def Binary : OpInterface<"Binary"> {
if (auto cstOp = llvm::dyn_cast_or_null<mlir::arith::ConstantOp>($_op->
getOpOperand(opNum).get().getDefiningOp()))
return cstOp->template getAttrOfType<mlir::DenseIntElementsAttr>("value").template getValues<llvm::APInt>();
else return {};
}]>,
else return {};
}]>,
];
}

View File

@@ -1,2 +1,3 @@
add_subdirectory(Interfaces)
add_subdirectory(IR)
add_subdirectory(Transforms)

View File

@@ -0,0 +1,3 @@
set(LLVM_TARGET_DEFINITIONS Passes.td)
mlir_tablegen(Passes.h.inc -gen-pass-decls -name Transforms)
add_public_tablegen_target(ConcretelangSDFGTransformsPassIncGen)

View File

@@ -0,0 +1,26 @@
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
// Exceptions. See
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
// for license information.
#ifndef CONCRETELANG_SDFG_TRANSFORMS_PASS_H
#define CONCRETELANG_SDFG_TRANSFORMS_PASS_H
#include <mlir/Dialect/Func/IR/FuncOps.h>
#include <mlir/Dialect/Linalg/IR/Linalg.h>
#include <mlir/Dialect/MemRef/IR/MemRef.h>
#include <mlir/Dialect/SCF/IR/SCF.h>
#include <mlir/Pass/Pass.h>
#define GEN_PASS_CLASSES
#include <concretelang/Dialect/SDFG/Transforms/Passes.h.inc>
namespace mlir {
namespace concretelang {
std::unique_ptr<mlir::Pass> createSDFGBufferOwnershipPass();
} // namespace concretelang
} // namespace mlir
#endif

View File

@@ -0,0 +1,17 @@
#ifndef CONCRETELANG_SDFG_PASSES
#define CONCRETELANG_SDFG_PASSES
include "mlir/Pass/PassBase.td"
def SDFGBufferOwnership : Pass<"SDFGBufferOwnership", "mlir::ModuleOp"> {
let summary =
"Take ownership of data passed to SDFG operators.";
let description = [{ As data used in SDFG operators, in particular
PUT operations, can be used asynchronously, deallocation must be
handled by the runtime if we take ownership. This pass removes
explicit deallocation calls where no other uses of the data exist
and makes copies otherwise, letting the runtime handle
deallocation when appropriate.}]; }
#endif

View File

@@ -25,6 +25,8 @@ bool _dfr_is_jit();
bool _dfr_is_root_node();
bool _dfr_use_omp();
bool _dfr_is_distributed();
void _dfr_run_remote_scheduler();
void _dfr_register_lib(void *dlh);
typedef enum _dfr_task_arg_type {
_DFR_TASK_ARG_BASE = 0,

View File

@@ -0,0 +1,28 @@
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
// Exceptions. See
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
// for license information.
#ifndef CONCRETELANG_GPUDFG_HPP
#define CONCRETELANG_GPUDFG_HPP
#ifdef CONCRETELANG_CUDA_SUPPORT
#include "bootstrap.h"
#include "device.h"
#include "keyswitch.h"
#include "linear_algebra.h"
#endif
namespace mlir {
namespace concretelang {
namespace gpu_dfg {
bool check_cuda_device_available();
bool check_cuda_runtime_enabled();
} // namespace gpu_dfg
} // namespace concretelang
} // namespace mlir
#endif

View File

@@ -109,10 +109,8 @@ struct RuntimeContextManager {
bool allocated = false;
bool lazy_key_transfer = false;
RuntimeContextManager(bool lazy = false) : lazy_key_transfer(lazy) {
context = nullptr;
_dfr_node_level_runtime_context_manager = this;
}
RuntimeContextManager(bool lazy = false)
: context(nullptr), lazy_key_transfer(lazy) {}
void setContext(void *ctx) {
assert(context == nullptr &&

View File

@@ -83,7 +83,8 @@ uint64_t stream_emulator_get_uint64(void *stream);
void *stream_emulator_make_memref_stream(const char *name, stream_type stype);
void stream_emulator_put_memref(void *stream, uint64_t *allocated,
uint64_t *aligned, uint64_t offset,
uint64_t size, uint64_t stride);
uint64_t size, uint64_t stride,
uint64_t data_ownership);
void stream_emulator_get_memref(void *stream, uint64_t *out_allocated,
uint64_t *out_aligned, uint64_t out_offset,
uint64_t out_size, uint64_t out_stride);
@@ -93,7 +94,8 @@ void *stream_emulator_make_memref_batch_stream(const char *name,
void stream_emulator_put_memref_batch(void *stream, uint64_t *allocated,
uint64_t *aligned, uint64_t offset,
uint64_t size0, uint64_t size1,
uint64_t stride0, uint64_t stride1);
uint64_t stride0, uint64_t stride1,
uint64_t data_ownership);
void stream_emulator_get_memref_batch(void *stream, uint64_t *out_allocated,
uint64_t *out_aligned,
uint64_t out_offset, uint64_t out_size0,

View File

@@ -10,40 +10,156 @@
#include <assert.h>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "concretelang/Runtime/DFRuntime.hpp"
#define TIME_UTIL_CLOCK CLOCK_MONOTONIC
namespace mlir {
namespace concretelang {
namespace time_util {
extern bool timing_enabled;
extern struct timespec timestamp;
} // namespace time_util
} // namespace concretelang
} // namespace mlir
static inline int timespec_diff(struct timespec *, const struct timespec *,
const struct timespec *);
#define CONCRETELANG_ENABLE_TIMING() \
do { \
assert(clock_gettime(TIME_UTIL_CLOCK, \
&mlir::concretelang::time_util::timestamp) == 0); \
char *env = getenv("CONCRETE_TIMING_ENABLED"); \
if (env != nullptr) \
if (!strncmp(env, "True", 4) || !strncmp(env, "true", 4) || \
!strncmp(env, "ON", 2) || !strncmp(env, "on", 2) || \
!strncmp(env, "1", 1)) \
mlir::concretelang::time_util::timing_enabled = true; \
} while (0)
#define BEGIN_TIME(p) \
do { \
assert(clock_gettime(TIME_UTIL_CLOCK, (p)) == 0); \
if (mlir::concretelang::time_util::timing_enabled) { \
assert(clock_gettime(TIME_UTIL_CLOCK, (p)) == 0); \
} \
} while (0)
#if CONCRETELANG_DATAFLOW_EXECUTION_ENABLED
#define END_TIME(p, m) \
do { \
struct timespec _end_time_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
std::cout << "[NODE \t" << _dfr_debug_get_node_id() << "] \t" << (m) \
<< " time : \t" << (p)->tv_sec << "." << (p)->tv_nsec \
<< " seconds.\n" \
<< std::flush; \
if (mlir::concretelang::time_util::timing_enabled) { \
struct timespec _end_time_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
struct timespec _timestamp_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
&mlir::concretelang::time_util::timestamp) == 0); \
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
<< "] -\t"; \
std::cout << "[NODE \t" << _dfr_debug_get_node_id() << "] \t" << (m) \
<< " time : \t" << time_in_seconds((p)) << " seconds.\n" \
<< std::flush; \
} \
} while (0)
#define END_TIME_C(p, m, c) \
do { \
if (mlir::concretelang::time_util::timing_enabled) { \
struct timespec _end_time_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
struct timespec _timestamp_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
&mlir::concretelang::time_util::timestamp) == 0); \
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
<< "] -\t"; \
std::cout << "[NODE \t" << _dfr_debug_get_node_id() << "] \t" << (m) \
<< " [" << (c) << "] time : \t" << time_in_seconds((p)) \
<< " seconds.\n" \
<< std::flush; \
} \
} while (0)
#define END_TIME_C_ACC(p, m, c, acc) \
do { \
if (mlir::concretelang::time_util::timing_enabled) { \
struct timespec _end_time_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
timespec_acc((acc), (p), (acc)); \
struct timespec _timestamp_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
&mlir::concretelang::time_util::timestamp) == 0); \
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
<< "] -\t"; \
std::cout << "[NODE \t" << _dfr_debug_get_node_id() << "] \t" << (m) \
<< " [" << (c) << "] time : \t" << time_in_seconds((p)) \
<< " (total : " << time_in_seconds((acc)) << " )" \
<< " seconds.\n" \
<< std::flush; \
} \
} while (0)
#else
#define END_TIME(p, m) \
do { \
struct timespec _end_time_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
std::cout << (m) << " time : \t" << (p)->tv_sec << "." << (p)->tv_nsec \
<< " seconds.\n" \
<< std::flush; \
if (mlir::concretelang::time_util::timing_enabled) { \
struct timespec _end_time_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
struct timespec _timestamp_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
&mlir::concretelang::time_util::timestamp) == 0); \
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
<< "] -\t"; \
std::cout << (m) << " time : \t" << time_in_seconds((p)) \
<< " seconds.\n" \
<< std::flush; \
} \
} while (0)
#define END_TIME_C(p, m, c) \
do { \
if (mlir::concretelang::time_util::timing_enabled) { \
struct timespec _end_time_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
struct timespec _timestamp_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
&mlir::concretelang::time_util::timestamp) == 0); \
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
<< "] -\t"; \
std::cout << (m) << " [" << (c) << "] time : \t" << time_in_seconds((p)) \
<< " seconds.\n" \
<< std::flush; \
} \
} while (0)
#define END_TIME_C_ACC(p, m, c, acc) \
do { \
if (mlir::concretelang::time_util::timing_enabled) { \
struct timespec _end_time_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
timespec_acc((acc), (p), (acc)); \
struct timespec _timestamp_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
&mlir::concretelang::time_util::timestamp) == 0); \
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
<< "] -\t"; \
std::cout << (m) << " [" << (c) << "] time : \t" << time_in_seconds((p)) \
<< " (total : " << time_in_seconds((acc)) << " )" \
<< " seconds.\n" \
<< std::flush; \
} \
} while (0)
#endif
@@ -57,6 +173,13 @@ static inline double get_thread_cpu_time(void) {
return _t;
}
static inline double time_in_seconds(struct timespec *_tv) {
double _t;
_t = _tv->tv_sec;
_t += _tv->tv_nsec * 1e-9;
return _t;
}
static inline int timespec_diff(struct timespec *_result,
const struct timespec *_px,
const struct timespec *_py) {
@@ -86,10 +209,23 @@ static inline int timespec_diff(struct timespec *_result,
return _x.tv_sec < _y.tv_sec;
}
static inline void timespec_acc(struct timespec *_result,
const struct timespec *_px,
const struct timespec *_py) {
struct timespec _x, _y;
_x = *_px;
_y = *_py;
_result->tv_sec = _x.tv_sec + _y.tv_sec;
_result->tv_nsec = _x.tv_nsec + _y.tv_nsec;
}
#else // CONCRETELANG_TIMING_ENABLED
#define CONCRETELANG_ENABLE_TIMING()
#define BEGIN_TIME(p)
#define END_TIME(p, m)
#define END_TIME_C(p, m, c)
#define END_TIME_C_ACC(p, m, c, acc)
#endif // CONCRETELANG_TIMING_ENABLED
#endif

View File

@@ -0,0 +1,21 @@
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
// Exceptions. See
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
// for license information.
#ifndef CONCRETELANG_RUNTIME_UTILS_H
#define CONCRETELANG_RUNTIME_UTILS_H
#include "llvm/Support/TargetSelect.h"
namespace mlir {
namespace concretelang {
// Mainly a wrapper to some LLVM functions. The reason to have this wrapper is
// to avoid linking conflicts between the python binary extension, and LLVM.
void LLVMInitializeNativeTarget();
} // namespace concretelang
} // namespace mlir
#endif

View File

@@ -21,13 +21,11 @@ namespace concretelang {
namespace dfr {
struct WorkFunctionRegistry;
namespace {
static void *dl_handle;
static WorkFunctionRegistry *_dfr_node_level_work_function_registry;
} // namespace
extern WorkFunctionRegistry *_dfr_node_level_work_function_registry;
extern void *dl_handle;
struct WorkFunctionRegistry {
WorkFunctionRegistry() { _dfr_node_level_work_function_registry = this; }
WorkFunctionRegistry() = default;
wfnptr getWorkFunctionPointer(const std::string &name) {
std::lock_guard<std::mutex> guard(registry_guard);

View File

@@ -26,8 +26,6 @@ using concretelang::protocol::Message;
namespace mlir {
namespace concretelang {
bool getEmitGPUOption();
/// Compilation context that acts as the root owner of LLVM and MLIR
/// data structures directly and indirectly referenced by artefacts
/// produced by the `CompilerEngine`.

View File

@@ -23,6 +23,7 @@
#include <ostream>
#include <string>
#include <thread>
#include <unistd.h>
using concretelang::clientlib::ClientCircuit;
using concretelang::clientlib::ClientProgram;
@@ -220,37 +221,32 @@ private:
auto new_path = [=]() {
llvm::SmallString<0> outputPath;
llvm::sys::path::append(outputPath, rootFolder);
std::string uid = std::to_string(
std::hash<std::thread::id>()(std::this_thread::get_id()));
auto pid = getpid();
std::string uid = std::to_string(pid);
uid.append("-");
uid.append(std::to_string(std::rand()));
llvm::sys::path::append(outputPath, uid);
return std::string(outputPath);
};
// Macos sometimes fail to create new directories. We have to retry a few
// times.
for (size_t i = 0; i < 5; i++) {
auto pathString = new_path();
auto ec = std::error_code();
llvm::errs() << "TestProgram: create temporary directory(" << pathString
<< ")\n";
if (!std::filesystem::create_directory(pathString, ec)) {
llvm::errs() << "TestProgram: fail to create temporary directory("
<< pathString << "), ";
if (ec) {
llvm::errs() << "already exists";
} else {
llvm::errs() << "error(" << ec.message() << ")";
}
auto pathString = new_path();
auto ec = std::error_code();
llvm::errs() << "TestProgram: create temporary directory(" << pathString
<< ")\n";
if (!std::filesystem::create_directory(pathString, ec)) {
llvm::errs() << "TestProgram: fail to create temporary directory("
<< pathString << "), ";
if (ec) {
llvm::errs() << "already exists";
} else {
llvm::errs() << "TestProgram: directory(" << pathString
<< ") successfully created\n";
return pathString;
llvm::errs() << "error(" << ec.message() << ")";
}
assert(false);
} else {
llvm::errs() << "TestProgram: directory(" << pathString
<< ") successfully created\n";
return pathString;
}
llvm::errs() << "Failed to create temp directory 5 times. Aborting...\n";
assert(false);
}
};

View File

@@ -12,6 +12,7 @@
#include "concretelang/Common/Keysets.h"
#include "concretelang/Dialect/FHE/IR/FHEOpsDialect.h.inc"
#include "concretelang/Runtime/DFRuntime.hpp"
#include "concretelang/Runtime/GPUDFG.hpp"
#include "concretelang/ServerLib/ServerLib.h"
#include "concretelang/Support/logging.h"
#include <llvm/Support/Debug.h>
@@ -462,6 +463,14 @@ void initDataflowParallelization() {
mlir::concretelang::dfr::_dfr_set_required(true);
}
bool checkGPURuntimeEnabled() {
return mlir::concretelang::gpu_dfg::check_cuda_runtime_enabled();
}
bool checkCudaDeviceAvailable() {
return mlir::concretelang::gpu_dfg::check_cuda_device_available();
}
std::string roundTrip(const char *module) {
std::shared_ptr<mlir::concretelang::CompilationContext> ccx =
mlir::concretelang::CompilationContext::createShared();
@@ -673,6 +682,8 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
m.def("terminate_df_parallelization", &terminateDataflowParallelization);
m.def("init_df_parallelization", &initDataflowParallelization);
m.def("check_gpu_runtime_enabled", &checkGPURuntimeEnabled);
m.def("check_cuda_device_available", &checkCudaDeviceAvailable);
pybind11::enum_<mlir::concretelang::Backend>(m, "Backend")
.value("CPU", mlir::concretelang::Backend::CPU)
@@ -957,6 +968,7 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
::concretelang::serverlib::ServerLambda lambda,
::concretelang::clientlib::PublicArguments &publicArguments,
::concretelang::clientlib::EvaluationKeys &evaluationKeys) {
pybind11::gil_scoped_release release;
SignalGuard signalGuard;
return library_server_call(support, lambda, publicArguments,
evaluationKeys);
@@ -1238,6 +1250,7 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
::concretelang::clientlib::PublicArguments &publicArguments,
::concretelang::clientlib::EvaluationKeys &evaluationKeys) {
SignalGuard signalGuard;
pybind11::gil_scoped_release release;
auto keyset = evaluationKeys.keyset;
auto values = publicArguments.values;
GET_OR_THROW_RESULT(auto output, circuit.call(keyset, values));
@@ -1268,6 +1281,7 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
[](::concretelang::clientlib::ValueExporter &exporter,
size_t position, int64_t value) {
SignalGuard signalGuard;
pybind11::gil_scoped_release release;
auto info = exporter.circuit.getCircuitInfo()
.asReader()
@@ -1288,6 +1302,7 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
size_t position, std::vector<int64_t> values,
std::vector<int64_t> shape) {
SignalGuard signalGuard;
pybind11::gil_scoped_release release;
std::vector<size_t> dimensions(shape.begin(), shape.end());
auto info =
exporter.circuit.getCircuitInfo().asReader().getInputs()[position];
@@ -1363,6 +1378,7 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
size_t position,
::concretelang::clientlib::SharedScalarOrTensorData &value) {
SignalGuard signalGuard;
pybind11::gil_scoped_release release;
auto result =
decrypter.circuit.processOutput(value.value, position);

View File

@@ -8,6 +8,8 @@ import atexit
from mlir._mlir_libs._concretelang._compiler import (
terminate_df_parallelization as _terminate_df_parallelization,
init_df_parallelization as _init_df_parallelization,
check_gpu_runtime_enabled as _check_gpu_runtime_enabled,
check_cuda_device_available as _check_cuda_device_available,
)
from mlir._mlir_libs._concretelang._compiler import round_trip as _round_trip
from mlir._mlir_libs._concretelang._compiler import (
@@ -49,6 +51,18 @@ def init_dfr():
_init_df_parallelization()
def check_gpu_enabled() -> bool:
"""Check whether the compiler and runtime support GPU offloading.
GPU offloading is not always available, in particular in non-GPU wheels."""
return _check_gpu_runtime_enabled()
def check_gpu_available() -> bool:
"""Check whether a CUDA device is available and online."""
return _check_cuda_device_available()
# Cleanly terminate the dataflow runtime if it has been initialized
# (does nothing otherwise)
atexit.register(_terminate_df_parallelization)

View File

@@ -403,13 +403,19 @@ struct LowerSDFGPut
"SDFG streams only support memrefs and integers.");
funcName = stream_emulator_put_uint64;
}
// Add data ownership flag - if the put operation takes ownership
// of the memref data, set to 0 by default.
mlir::SmallVector<mlir::Value> operands(putOp->getOperands());
operands.push_back(rewriter.create<mlir::arith::ConstantOp>(
putOp.getLoc(), rewriter.getI64IntegerAttr(0)));
if (insertGenericForwardDeclaration(putOp, rewriter, funcName,
putOp->getOperandTypes(),
mlir::ValueRange{operands}.getTypes(),
putOp->getResultTypes())
.failed())
return ::mlir::failure();
mlir::SmallVector<mlir::Value> newOps;
castDynamicTensorOps(putOp, rewriter, putOp->getOperands(), newOps);
castDynamicTensorOps(putOp, rewriter, operands, newOps);
rewriter.replaceOpWithNewOp<mlir::func::CallOp>(
putOp, funcName, putOp->getResultTypes(), newOps);
return ::mlir::success();

View File

@@ -3,6 +3,7 @@
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
// for license information.
#include "concretelang/Dialect/Tracing/IR/TracingOps.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
@@ -686,6 +687,37 @@ struct ZeroTensorOpPattern
};
};
struct TraceCiphertextOpPattern
: public mlir::OpConversionPattern<Tracing::TraceCiphertextOp> {
TraceCiphertextOpPattern(mlir::MLIRContext *context,
mlir::TypeConverter &typeConverter)
: mlir::OpConversionPattern<Tracing::TraceCiphertextOp>(
typeConverter, context,
mlir::concretelang::DEFAULT_PATTERN_BENEFIT) {}
::mlir::LogicalResult
matchAndRewrite(Tracing::TraceCiphertextOp traceCiphertextOp,
Tracing::TraceCiphertextOp::Adaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
Tracing::TracePlaintextOp ptOp =
rewriter.replaceOpWithNewOp<Tracing::TracePlaintextOp>(
traceCiphertextOp, mlir::TypeRange{}, adaptor.getCiphertext());
if (auto msg = traceCiphertextOp.getMsg())
ptOp.setMsg(msg);
if (auto nmsb = traceCiphertextOp.getNmsb())
ptOp.setNmsb(nmsb);
auto inputWidth =
ptOp.getPlaintext().getType().cast<mlir::IntegerType>().getWidth();
ptOp->setAttr("input_width", rewriter.getI64IntegerAttr(inputWidth));
return ::mlir::success();
};
};
struct SimulateTFHEPass : public SimulateTFHEBase<SimulateTFHEPass> {
bool enableOverflowDetection;
SimulateTFHEPass(bool enableOverflowDetection)
@@ -704,7 +736,8 @@ void SimulateTFHEPass::runOnOperation() {
target.addLegalOp<mlir::func::CallOp, mlir::memref::GetGlobalOp,
mlir::memref::CastOp, mlir::bufferization::AllocTensorOp,
mlir::tensor::CastOp, mlir::LLVM::GlobalOp,
mlir::LLVM::AddressOfOp, mlir::LLVM::GEPOp>();
mlir::LLVM::AddressOfOp, mlir::LLVM::GEPOp,
Tracing::TracePlaintextOp>();
// Make sure that no ops from `TFHE` remain after the lowering
target.addIllegalDialect<TFHE::TFHEDialect>();
@@ -742,11 +775,11 @@ void SimulateTFHEPass::runOnOperation() {
mlir::tensor::InsertOp, mlir::tensor::InsertSliceOp,
mlir::tensor::ParallelInsertSliceOp, mlir::tensor::FromElementsOp,
mlir::tensor::ExpandShapeOp, mlir::tensor::CollapseShapeOp,
mlir::bufferization::AllocTensorOp, mlir::tensor::EmptyOp>(
[&](mlir::Operation *op) {
return converter.isLegal(op->getResultTypes()) &&
converter.isLegal(op->getOperandTypes());
});
mlir::bufferization::AllocTensorOp, mlir::tensor::EmptyOp,
Tracing::TraceCiphertextOp>([&](mlir::Operation *op) {
return converter.isLegal(op->getResultTypes()) &&
converter.isLegal(op->getOperandTypes());
});
// Make sure that no ops `linalg.generic` that have illegal types
target
.addDynamicallyLegalOp<mlir::linalg::GenericOp, mlir::tensor::GenerateOp>(
@@ -778,8 +811,8 @@ void SimulateTFHEPass::runOnOperation() {
patterns.insert<ZeroOpPattern, ZeroTensorOpPattern, KeySwitchGLWEOpPattern,
WopPBSGLWEOpPattern, EncodeLutForCrtWopPBSOpPattern,
EncodePlaintextWithCrtOpPattern, NegOpPattern>(&getContext(),
converter);
EncodePlaintextWithCrtOpPattern, NegOpPattern,
TraceCiphertextOpPattern>(&getContext(), converter);
patterns.insert<SubIntGLWEOpPattern>(&getContext());
// if overflow detection is enable, then rewrite to CAPI functions that

View File

@@ -6,11 +6,13 @@
#include <chrono>
#include <cmath>
#include <initializer_list>
#include <iostream>
#include <optional>
#include <vector>
#include "boost/outcome.h"
#include "concretelang/Dialect/FHE/Interfaces/FHEInterfaces.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
@@ -34,6 +36,9 @@
#define GEN_PASS_CLASSES
#include "concretelang/Dialect/FHE/Analysis/ConcreteOptimizer.h.inc"
using concrete_optimizer::utils::location_from_string;
using concrete_optimizer::utils::location_unknown;
namespace mlir {
namespace concretelang {
namespace optimizer {
@@ -110,7 +115,9 @@ struct FunctionToDag {
}
auto precision = fhe::utils::getEintPrecision(arg);
auto shape = getShape(arg);
auto opI = dagBuilder.add_input(precision, slice(shape));
auto opI =
dagBuilder.add_input(precision, slice(shape),
*loc_to_location(arg.getParentRegion()->getLoc()));
index[arg] = opI;
return opI;
}
@@ -190,6 +197,11 @@ struct FunctionToDag {
} else if (auto matmulEintEint = asMatmulEintEint(op)) {
addEncMatMulTensor(matmulEintEint, encrypted_inputs, precision);
return;
} else if (auto zero = asZeroNoise(op)) {
// special case as zero are rewritten in several optimizer nodes
index = addZeroNoise(zero);
} else if (auto additive = asAdditiveNoise(op)) {
index = addAdditiveNoise(additive, encrypted_inputs);
} else {
index = addLevelledOp(op, encrypted_inputs);
}
@@ -201,6 +213,7 @@ struct FunctionToDag {
void addLut(mlir::Operation &op, FHE::FheIntegerInterface inputType,
Inputs &encrypted_inputs, int precision) {
auto val = op.getResult(0);
auto loc = loc_to_location(op.getLoc());
assert(encrypted_inputs.size() == 1);
// No need to distinguish different lut kind until we do approximate
// paradigm on outputs
@@ -209,13 +222,14 @@ struct FunctionToDag {
std::vector<int32_t> operatorIndexes;
if (inputType.isSigned()) {
// std::vector<std::int64_t> weights_vector{1};
auto addIndex = dagBuilder.add_dot(
slice(encrypted_inputs), concrete_optimizer::weights::number(1));
auto addIndex =
dagBuilder.add_dot(slice(encrypted_inputs),
concrete_optimizer::weights::number(1), *loc);
encrypted_input = addIndex;
operatorIndexes.push_back(addIndex.index);
}
auto lutIndex =
dagBuilder.add_lut(encrypted_input, slice(unknowFunction), precision);
auto lutIndex = dagBuilder.add_lut(encrypted_input, slice(unknowFunction),
precision, *loc);
operatorIndexes.push_back(lutIndex.index);
mlir::Builder builder(op.getContext());
if (setOptimizerID)
@@ -229,7 +243,8 @@ struct FunctionToDag {
// No need to distinguish different lut kind until we do approximate
// paradigm on outputs
auto encrypted_input = encrypted_inputs[0];
index[val] = dagBuilder.add_round_op(encrypted_input, rounded_precision);
index[val] = dagBuilder.add_round_op(encrypted_input, rounded_precision,
*loc_to_location(val.getLoc()));
return index[val];
}
@@ -238,7 +253,8 @@ struct FunctionToDag {
int new_precision) {
assert(encrypted_inputs.size() == 1);
auto encrypted_input = encrypted_inputs[0];
index[val] = dagBuilder.add_unsafe_cast_op(encrypted_input, new_precision);
index[val] = dagBuilder.add_unsafe_cast_op(encrypted_input, new_precision,
*loc_to_location(val.getLoc()));
return index[val];
}
@@ -247,8 +263,8 @@ struct FunctionToDag {
std::vector<std::int64_t> &weights_vector) {
assert(encrypted_inputs.size() == 1);
auto weights = concrete_optimizer::weights::vector(slice(weights_vector));
index[val] =
dagBuilder.add_dot(slice(encrypted_inputs), std::move(weights));
index[val] = dagBuilder.add_dot(slice(encrypted_inputs), std::move(weights),
*loc_to_location(val.getLoc()));
return index[val];
}
@@ -256,32 +272,110 @@ struct FunctionToDag {
std::string loc;
llvm::raw_string_ostream loc_stream(loc);
location.print(loc_stream);
loc.erase(0, 4); // erase the loc( prefix
loc.pop_back(); // erase the ) postfix
loc.erase(std::remove(loc.begin(), loc.end(), '"'),
loc.end()); // erase the " characters
return loc;
}
concrete_optimizer::dag::OperatorIndex
addZeroNoise(concretelang::FHE::ZeroNoise &op) {
auto val = op->getOpResult(0);
auto outShape = getShape(val);
auto loc = loc_to_location(op.getLoc());
// Trivial encrypted constants encoding
// There are converted to input + levelledop
auto precision = fhe::utils::getEintPrecision(val);
auto opI = dagBuilder.add_input(precision, slice(outShape), *loc);
auto inputs = Inputs{opI};
// Default complexity is negligible
double const fixedCost = NEGLIGIBLE_COMPLEXITY;
double const lweDimCostFactor = NEGLIGIBLE_COMPLEXITY;
auto comment = std::string(op->getName().getStringRef()) + " " +
loc_to_string(op.getLoc());
auto weights = std::vector<double>{1.};
index[val] = dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor,
fixedCost, slice(weights),
slice(outShape), comment, *loc);
return index[val];
}
concrete_optimizer::dag::OperatorIndex
addAdditiveNoise(concretelang::FHE::AdditiveNoise &op, Inputs &inputs) {
auto val = op->getResult(0);
auto out_shape = getShape(val);
// Default complexity is negligible
double fixed_cost = NEGLIGIBLE_COMPLEXITY;
double lwe_dim_cost_factor = NEGLIGIBLE_COMPLEXITY;
auto comment = std::string(op->getName().getStringRef()) + " " +
loc_to_string(op.getLoc());
auto loc = loc_to_location(op.getLoc());
auto weights = std::vector<double>(inputs.size(), 1.);
index[val] = dagBuilder.add_levelled_op(slice(inputs), lwe_dim_cost_factor,
fixed_cost, slice(weights),
slice(out_shape), comment, *loc);
return index[val];
}
rust::Box<concrete_optimizer::Location>
loc_to_location(mlir::Location location) {
return location_from_string(loc_to_string(location));
}
concrete_optimizer::dag::OperatorIndex addLevelledOp(mlir::Operation &op,
Inputs &inputs) {
auto val = op.getResult(0);
auto out_shape = getShape(val);
auto loc = loc_to_location(op.getLoc());
if (inputs.empty()) {
// Trivial encrypted constants encoding
// There are converted to input + levelledop
auto precision = fhe::utils::getEintPrecision(val);
auto opI = dagBuilder.add_input(precision, slice(out_shape));
auto opI = dagBuilder.add_input(precision, slice(out_shape), *loc);
inputs.push_back(opI);
}
// Default complexity is negligible
double fixed_cost = NEGLIGIBLE_COMPLEXITY;
double lwe_dim_cost_factor = NEGLIGIBLE_COMPLEXITY;
auto smanp_int = op.getAttrOfType<mlir::IntegerAttr>("SMANP");
auto loc = loc_to_string(op.getLoc());
assert(smanp_int && "Missing manp value on a crypto operation");
// TODO: use APIFloat.sqrt when it's available
double manp = sqrt(smanp_int.getValue().roundToDouble());
auto comment = std::string(op.getName().getStringRef()) + " " + loc;
index[val] =
dagBuilder.add_levelled_op(slice(inputs), lwe_dim_cost_factor,
fixed_cost, manp, slice(out_shape), comment);
auto comment = std::string(op.getName().getStringRef()) + " " +
loc_to_string(op.getLoc());
size_t n_inputs = 0;
double sq_sum = 0;
for (auto input : op.getOperands()) {
if (!fhe::utils::isEncryptedValue(input)) {
continue;
}
n_inputs += 1;
if (input.isa<mlir::BlockArgument>()) {
sq_sum += 1.0;
} else {
auto inpSmanpInt =
input.getDefiningOp()->getAttrOfType<mlir::IntegerAttr>("SMANP");
const double inpSManp = inpSmanpInt.getValue().roundToDouble();
sq_sum += inpSManp;
}
}
assert(inputs.size() == n_inputs);
double weight;
if (sq_sum == 0) {
// The max input manp is zero, meaning the inputs are all zero tensors
// with no noise. In this case it does not matter the weight since it will
// multiply zero.
weight = 1.;
} else {
double smanp_dbl = smanp_int.getValue().roundToDouble();
weight = std::max(sqrt(smanp_dbl / sq_sum), 1.0);
assert(!std::isnan(weight));
}
auto weights = std::vector<double>(n_inputs, weight);
index[val] = dagBuilder.add_levelled_op(slice(inputs), lwe_dim_cost_factor,
fixed_cost, slice(weights),
slice(out_shape), comment, *loc);
return index[val];
}
@@ -297,17 +391,19 @@ struct FunctionToDag {
assert(encrypted_inputs.size() == 1);
auto input = lsbOp.getInput();
auto result = lsbOp.getResult();
auto loc = loc_to_location(lsbOp.getLoc());
auto input_precision = fhe::utils::getEintPrecision(input);
auto output_precision = fhe::utils::getEintPrecision(result);
auto lsb_shiffted_as_1bit_wop = dagBuilder.add_dot(
slice(encrypted_inputs),
concrete_optimizer::weights::number(1 << input_precision));
concrete_optimizer::weights::number(1 << input_precision), *loc);
std::vector<std::uint64_t> unknownFunction;
auto overflow_bit_precision = 0;
auto lsb_as_0_bits = dagBuilder.add_unsafe_cast_op(
lsb_shiffted_as_1bit_wop, overflow_bit_precision); // id for rotation
auto lsb_as_0_bits = dagBuilder.add_unsafe_cast_op(lsb_shiffted_as_1bit_wop,
overflow_bit_precision,
*loc); // id for rotation
auto lsb_result = dagBuilder.add_lut(lsb_as_0_bits, slice(unknownFunction),
output_precision);
output_precision, *loc);
auto lsb_result_corrected = idPlaceholder(lsb_result);
index[result] = lsb_result_corrected;
@@ -336,46 +432,23 @@ struct FunctionToDag {
mlir::Value result = mulOp.getResult();
const std::vector<uint64_t> resultShape = getShape(result);
Operation *xOp = mulOp.getLhs().getDefiningOp();
Operation *yOp = mulOp.getRhs().getDefiningOp();
const double fixedCost = NEGLIGIBLE_COMPLEXITY;
const double lweDimCostFactor = NEGLIGIBLE_COMPLEXITY;
llvm::APInt xSmanp = llvm::APInt{1, 1, false};
if (xOp != nullptr) {
const auto xSmanpAttr = xOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
assert(xSmanpAttr && "Missing SMANP value on a crypto operation");
xSmanp = xSmanpAttr.getValue();
}
llvm::APInt ySmanp = llvm::APInt{1, 1, false};
if (yOp != nullptr) {
const auto ySmanpAttr = yOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
assert(ySmanpAttr && "Missing SMANP value on a crypto operation");
ySmanp = ySmanpAttr.getValue();
}
auto loc = loc_to_string(mulOp.getLoc());
auto comment = std::string(mulOp->getName().getStringRef()) + " " + loc;
// (x + y) and (x - y)
const double addSubManp =
sqrt(xSmanp.roundToDouble() + ySmanp.roundToDouble());
// tlu(v)
const double tluManp = 1;
// tlu(v1) - tlu(v2)
const double tluSubManp = sqrt(tluManp + tluManp);
auto comment = std::string(mulOp->getName().getStringRef()) + " " +
loc_to_string(mulOp.getLoc());
auto loc = loc_to_location(mulOp.getLoc());
// for tlus
const std::vector<std::uint64_t> unknownFunction;
// tlu(x + y)
auto addNode =
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
addSubManp, slice(resultShape), comment);
auto addWeights = std::vector<double>{1, 1};
auto addNode = dagBuilder.add_levelled_op(
slice(inputs), lweDimCostFactor, fixedCost, slice(addWeights),
slice(resultShape), comment, *loc);
std::optional<concrete_optimizer::dag::OperatorIndex> lhsCorrectionNode;
if (isSignedEint(mulOp.getType())) {
// If signed mul we need to add the addition node for correction of the
@@ -383,31 +456,37 @@ struct FunctionToDag {
addNode = dagBuilder.add_dot(
slice(std::vector<concrete_optimizer::dag::OperatorIndex>{addNode}),
concrete_optimizer::weights::vector(
slice(std::vector<std::int64_t>{1})));
slice(std::vector<std::int64_t>{1})),
*loc);
lhsCorrectionNode = addNode;
}
auto lhsTluNode =
dagBuilder.add_lut(addNode, slice(unknownFunction), precision);
dagBuilder.add_lut(addNode, slice(unknownFunction), precision, *loc);
// tlu(x - y)
auto subNode =
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
addSubManp, slice(resultShape), comment);
auto subWeights = std::vector<double>{1, 1};
auto subNode = dagBuilder.add_levelled_op(
slice(inputs), lweDimCostFactor, fixedCost, slice(subWeights),
slice(resultShape), comment, *loc);
// This is a signed tlu so we need to also add the addition for correction
// signed tlu
auto rhsCorrectionNode = dagBuilder.add_dot(
slice(std::vector<concrete_optimizer::dag::OperatorIndex>{subNode}),
concrete_optimizer::weights::vector(
slice(std::vector<std::int64_t>{1})));
auto rhsTluNode = dagBuilder.add_lut(rhsCorrectionNode,
slice(unknownFunction), precision);
slice(std::vector<std::int64_t>{1})),
*loc);
auto rhsTluNode = dagBuilder.add_lut(
rhsCorrectionNode, slice(unknownFunction), precision, *loc);
// tlu(x + y) - tlu(x - y)
auto resultWeights = std::vector<double>{1, 1};
const std::vector<concrete_optimizer::dag::OperatorIndex> subInputs = {
lhsTluNode, rhsTluNode};
auto resultNode = dagBuilder.add_levelled_op(
slice(subInputs), lweDimCostFactor, fixedCost, tluSubManp,
slice(resultShape), comment);
slice(subInputs), lweDimCostFactor, fixedCost, slice(resultWeights),
slice(resultShape), comment, *loc);
index[result] = resultNode;
mlir::Builder builder(mulOp.getContext());
@@ -430,6 +509,7 @@ struct FunctionToDag {
addTensorInnerProductEncEnc(InnerProductOp &innerProductOp, Inputs &inputs,
int precision) {
mlir::Value result = innerProductOp.getResult();
auto loc = loc_to_location(innerProductOp.getLoc());
const std::vector<uint64_t> resultShape = getShape(result);
// We assume a first tensorized matmul step
@@ -512,34 +592,11 @@ struct FunctionToDag {
// 1. (x + y) and (x - y) -> supposing broadcasting is used
// to tensorize this operation
Operation *xOp = innerProductOp.getLhs().getDefiningOp();
Operation *yOp = innerProductOp.getRhs().getDefiningOp();
const double fixedCost = NEGLIGIBLE_COMPLEXITY;
const double lweDimCostFactor = NEGLIGIBLE_COMPLEXITY;
llvm::APInt xSmanp = llvm::APInt{1, 1, false};
if (xOp != nullptr) {
const auto xSmanpAttr = xOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
assert(xSmanpAttr && "Missing SMANP value on a crypto operation");
xSmanp = xSmanpAttr.getValue();
}
llvm::APInt ySmanp = llvm::APInt{1, 1, false};
if (yOp != nullptr) {
const auto ySmanpAttr = yOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
assert(ySmanpAttr && "Missing SMANP value on a crypto operation");
ySmanp = ySmanpAttr.getValue();
}
auto loc = loc_to_string(innerProductOp.getLoc());
auto comment =
std::string(innerProductOp->getName().getStringRef()) + " " + loc;
// (x + y) and (x - y)
const double addSubManp =
sqrt(xSmanp.roundToDouble() + ySmanp.roundToDouble());
auto comment = std::string(innerProductOp->getName().getStringRef()) + " " +
loc_to_string(innerProductOp.getLoc());
// tlu(v)
const double tluManp = 1;
@@ -551,9 +608,11 @@ struct FunctionToDag {
const std::vector<std::uint64_t> unknownFunction;
// tlu(x + y)
auto addNode =
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
addSubManp, slice(pairMatrixShape), comment);
auto addWeights = std::vector<double>{1, 1};
auto addNode = dagBuilder.add_levelled_op(
slice(inputs), lweDimCostFactor, fixedCost, slice(addWeights),
slice(pairMatrixShape), comment, *loc);
std::optional<concrete_optimizer::dag::OperatorIndex> lhsCorrectionNode;
if (isSignedEint(innerProductOp.getType())) {
// If signed mul we need to add the addition node for correction of the
@@ -561,31 +620,36 @@ struct FunctionToDag {
addNode = dagBuilder.add_dot(
slice(std::vector<concrete_optimizer::dag::OperatorIndex>{addNode}),
concrete_optimizer::weights::vector(
slice(std::vector<std::int64_t>{1})));
slice(std::vector<std::int64_t>{1})),
*loc);
lhsCorrectionNode = addNode;
}
auto lhsTluNode =
dagBuilder.add_lut(addNode, slice(unknownFunction), precision);
dagBuilder.add_lut(addNode, slice(unknownFunction), precision, *loc);
// tlu(x - y)
auto subNode =
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
addSubManp, slice(pairMatrixShape), comment);
auto subWeights = std::vector<double>{1, 1};
auto subNode = dagBuilder.add_levelled_op(
slice(inputs), lweDimCostFactor, fixedCost, slice(subWeights),
slice(pairMatrixShape), comment, *loc);
// This is a signed tlu so we need to also add the addition for correction
// signed tlu
auto rhsCorrectionNode = dagBuilder.add_dot(
slice(std::vector<concrete_optimizer::dag::OperatorIndex>{subNode}),
concrete_optimizer::weights::vector(
slice(std::vector<std::int64_t>{1})));
auto rhsTluNode = dagBuilder.add_lut(rhsCorrectionNode,
slice(unknownFunction), precision);
slice(std::vector<std::int64_t>{1})),
*loc);
auto rhsTluNode = dagBuilder.add_lut(
rhsCorrectionNode, slice(unknownFunction), precision, *loc);
// tlu(x + y) - tlu(x - y)
auto resultWeights = std::vector<double>{1, 1};
const std::vector<concrete_optimizer::dag::OperatorIndex> subInputs = {
lhsTluNode, rhsTluNode};
auto resultNode = dagBuilder.add_levelled_op(
slice(subInputs), lweDimCostFactor, fixedCost, tluSubManp,
slice(pairMatrixShape), comment);
slice(subInputs), lweDimCostFactor, fixedCost, slice(resultWeights),
slice(pairMatrixShape), comment, *loc);
// 3. Sum(tlu(x + y) - tlu(x - y))
// Create a leveled op that simulates concatenation. It takes
@@ -606,9 +670,10 @@ struct FunctionToDag {
// TODO: use APIFloat.sqrt when it's available
double manp = sqrt(smanp_int.getValue().roundToDouble());
auto weights = std::vector<double>(sumOperands.size(), manp / tluSubManp);
index[result] = dagBuilder.add_levelled_op(
slice(sumOperands), lwe_dim_cost_factor, fixed_cost, manp,
slice(resultShape), comment);
slice(sumOperands), lwe_dim_cost_factor, fixed_cost, slice(weights),
slice(resultShape), comment, *loc);
// Create the TFHE.OId attributes
// The first elements of the vector are nodes for the encrypted
@@ -647,49 +712,31 @@ struct FunctionToDag {
void addMax(FHE::MaxEintOp &maxOp, Inputs &inputs, int precision) {
mlir::Value result = maxOp.getResult();
auto loc = loc_to_location(maxOp.getLoc());
const std::vector<uint64_t> resultShape = getShape(result);
Operation *xOp = maxOp.getX().getDefiningOp();
Operation *yOp = maxOp.getY().getDefiningOp();
const double fixedCost = NEGLIGIBLE_COMPLEXITY;
const double lweDimCostFactor = NEGLIGIBLE_COMPLEXITY;
llvm::APInt xSmanp = llvm::APInt{1, 1, false};
if (xOp != nullptr) {
const auto xSmanpAttr = xOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
assert(xSmanpAttr && "Missing SMANP value on a crypto operation");
xSmanp = xSmanpAttr.getValue();
}
auto comment = std::string(maxOp->getName().getStringRef()) + " " +
loc_to_string(maxOp.getLoc());
llvm::APInt ySmanp = llvm::APInt{1, 1, false};
if (yOp != nullptr) {
const auto ySmanpAttr = yOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
assert(ySmanpAttr && "Missing SMANP value on a crypto operation");
ySmanp = ySmanpAttr.getValue();
}
auto subWeights = std::vector<double>{1, 1};
auto subNode = dagBuilder.add_levelled_op(
slice(inputs), lweDimCostFactor, fixedCost, slice(subWeights),
slice(resultShape), comment, *loc);
const double subManp =
sqrt(xSmanp.roundToDouble() + ySmanp.roundToDouble());
auto loc = loc_to_string(maxOp.getLoc());
auto comment = std::string(maxOp->getName().getStringRef()) + " " + loc;
auto subNode =
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
subManp, slice(resultShape), comment);
const double tluNodeManp = 1;
const std::vector<std::uint64_t> unknownFunction;
auto tluNode =
dagBuilder.add_lut(subNode, slice(unknownFunction), precision);
dagBuilder.add_lut(subNode, slice(unknownFunction), precision, *loc);
const double addManp = sqrt(tluNodeManp + ySmanp.roundToDouble());
const std::vector<concrete_optimizer::dag::OperatorIndex> addInputs = {
tluNode, inputs[1]};
auto addWeights = std::vector<double>{1, 1};
auto resultNode = dagBuilder.add_levelled_op(
slice(addInputs), lweDimCostFactor, fixedCost, addManp,
slice(resultShape), comment);
slice(addInputs), lweDimCostFactor, fixedCost, slice(addWeights),
slice(resultShape), comment, *loc);
index[result] = resultNode;
// Set attribute on the MLIR node
@@ -705,6 +752,7 @@ struct FunctionToDag {
int precision) {
mlir::Value result = maxpool2dOp.getResult();
const std::vector<uint64_t> resultShape = getShape(result);
auto loc = loc_to_location(maxpool2dOp.getLoc());
// all TLUs are flattened into a dimension
// to create a single TLU node in optimizer dag
@@ -732,25 +780,29 @@ struct FunctionToDag {
const double subManp = sqrt(2 * inputSmanp.roundToDouble() + 1);
auto loc = loc_to_string(maxpool2dOp.getLoc());
auto comment =
std::string(maxpool2dOp->getName().getStringRef()) + " " + loc;
auto comment = std::string(maxpool2dOp->getName().getStringRef()) + " " +
loc_to_string(maxpool2dOp.getLoc());
auto subNode =
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
subManp, slice(fakeShape), comment);
auto subWeights = std::vector<double>(
inputs.size(), subManp / sqrt(inputSmanp.roundToDouble()));
auto subNode = dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor,
fixedCost, slice(subWeights),
slice(fakeShape), comment, *loc);
const std::vector<std::uint64_t> unknownFunction;
auto tluNode =
dagBuilder.add_lut(subNode, slice(unknownFunction), precision);
dagBuilder.add_lut(subNode, slice(unknownFunction), precision, *loc);
const double addManp = sqrt(inputSmanp.roundToDouble() + 1);
const std::vector<concrete_optimizer::dag::OperatorIndex> addInputs = {
tluNode, inputs[0]};
auto resultWeights = std::vector<double>(
addInputs.size(), addManp / sqrt(inputSmanp.roundToDouble()));
auto resultNode = dagBuilder.add_levelled_op(
slice(addInputs), lweDimCostFactor, fixedCost, addManp,
slice(resultShape), comment);
slice(addInputs), lweDimCostFactor, fixedCost, slice(resultWeights),
slice(resultShape), comment, *loc);
index[result] = resultNode;
// Set attribute on the MLIR node
mlir::Builder builder(maxpool2dOp.getContext());
@@ -771,7 +823,8 @@ struct FunctionToDag {
idPlaceholder(concrete_optimizer::dag::OperatorIndex input) {
std::vector inputs = {input};
return dagBuilder.add_dot(slice(inputs),
concrete_optimizer::weights::number(1));
concrete_optimizer::weights::number(1),
*location_unknown());
}
Inputs encryptedInputs(mlir::Operation &op) {
@@ -852,6 +905,14 @@ struct FunctionToDag {
return llvm::dyn_cast<mlir::concretelang::FHELinalg::MulEintOp>(op);
}
mlir::concretelang::FHE::ZeroNoise asZeroNoise(mlir::Operation &op) {
return llvm::dyn_cast<mlir::concretelang::FHE::ZeroNoise>(op);
}
mlir::concretelang::FHE::AdditiveNoise asAdditiveNoise(mlir::Operation &op) {
return llvm::dyn_cast<mlir::concretelang::FHE::AdditiveNoise>(op);
}
mlir::concretelang::FHE::MaxEintOp asMax(mlir::Operation &op) {
return llvm::dyn_cast<mlir::concretelang::FHE::MaxEintOp>(op);
}

View File

@@ -794,14 +794,12 @@ public:
std::optional<llvm::APInt>
norm2SqEquivFromOp(Operation *op, ArrayRef<const MANPLattice *> operands) {
std::optional<llvm::APInt> norm2SqEquiv;
if (auto cstNoiseOp =
llvm::dyn_cast<mlir::concretelang::FHE::ConstantNoise>(op)) {
if (llvm::isa<mlir::concretelang::FHE::ZeroEintOp,
mlir::concretelang::FHE::ZeroTensorOp>(op)) {
norm2SqEquiv = llvm::APInt{1, 0, false};
} else {
norm2SqEquiv = llvm::APInt{1, 1, false};
}
if (auto zeroNoiseOp =
llvm::dyn_cast<mlir::concretelang::FHE::ZeroNoise>(op)) {
norm2SqEquiv = llvm::APInt{1, 0, false};
} else if (auto cstNoiseOp =
llvm::dyn_cast<mlir::concretelang::FHE::ConstantNoise>(op)) {
norm2SqEquiv = llvm::APInt{1, 1, false};
} else if (llvm::isa<mlir::concretelang::FHE::ToBoolOp>(op) ||
llvm::isa<mlir::concretelang::FHE::FromBoolOp>(op)) {
norm2SqEquiv = getNoOpSqMANP(operands);

View File

@@ -306,19 +306,6 @@ static void lowerDataflowTaskOp(RT::DataflowTaskOp DFTOp,
DFTOp.erase();
}
static void registerWorkFunction(mlir::func::FuncOp parentFunc,
mlir::func::FuncOp workFunction) {
OpBuilder builder(parentFunc.getBody());
builder.setInsertionPointToStart(&parentFunc.getBody().front());
auto fnptr = builder.create<mlir::func::ConstantOp>(
parentFunc.getLoc(), workFunction.getFunctionType(),
SymbolRefAttr::get(builder.getContext(), workFunction.getName()));
builder.create<RT::RegisterTaskWorkFunctionOp>(parentFunc.getLoc(),
fnptr.getResult());
}
static func::FuncOp getCalledFunction(CallOpInterface callOp) {
SymbolRefAttr sym = callOp.getCallableForCallee().dyn_cast<SymbolRefAttr>();
if (!sym)
@@ -333,8 +320,6 @@ struct LowerDataflowTasksPass
void runOnOperation() override {
auto module = getOperation();
SmallVector<func::FuncOp, 4> workFunctions;
SmallVector<func::FuncOp, 1> entryPoints;
module.walk([&](mlir::func::FuncOp func) {
static int wfn_id = 0;
@@ -357,58 +342,12 @@ struct LowerDataflowTasksPass
outliningMap.push_back(
std::pair<RT::DataflowTaskOp, func::FuncOp>(op, outlinedFunc));
symbolTable.insert(outlinedFunc);
workFunctions.push_back(outlinedFunc);
return WalkResult::advance();
});
// Lower the DF task ops to RT dialect ops.
for (auto mapping : outliningMap)
lowerDataflowTaskOp(mapping.first, mapping.second);
// Gather all entry points (assuming no recursive calls to entry points)
// Main is always an entry-point - otherwise check if this
// function is called within the module. TODO: we assume no
// recursion.
if (func.getName() == "main")
entryPoints.push_back(func);
else {
bool found = false;
module.walk([&](mlir::func::CallOp op) {
if (getCalledFunction(op) == func)
found = true;
});
if (!found)
entryPoints.push_back(func);
}
});
for (auto entryPoint : entryPoints) {
// If this is a JIT invocation and we're not on the root node,
// we do not need to do any computation, only register all work
// functions with the runtime system
if (!workFunctions.empty()) {
if (!dfr::_dfr_is_root_node()) {
entryPoint.eraseBody();
Block *b = new Block;
FunctionType funTy = entryPoint.getFunctionType();
SmallVector<Location> locations(funTy.getInputs().size(),
entryPoint.getLoc());
b->addArguments(funTy.getInputs(), locations);
entryPoint.getBody().push_front(b);
for (int i = funTy.getNumInputs() - 1; i >= 0; --i)
entryPoint.eraseArgument(i);
for (int i = funTy.getNumResults() - 1; i >= 0; --i)
entryPoint.eraseResult(i);
OpBuilder builder(entryPoint.getBody());
builder.setInsertionPointToEnd(&entryPoint.getBody().front());
builder.create<mlir::func::ReturnOp>(entryPoint.getLoc());
}
}
// Generate code to register all work-functions with the
// runtime.
for (auto wf : workFunctions)
registerWorkFunction(entryPoint, wf);
}
}
LowerDataflowTasksPass(bool debug) : debug(debug){};
@@ -428,29 +367,27 @@ struct StartStopPass : public StartStopBase<StartStopPass> {
void runOnOperation() override {
auto module = getOperation();
int useDFR = 0;
SmallVector<func::FuncOp, 1> entryPoints;
// Gather all entry points in the module.
module.walk([&](mlir::func::FuncOp func) {
// Do not add start/stop to work functions - but if any are
// present, then we need to activate the runtime
if (func->getAttr("_dfr_work_function_attribute")) {
useDFR = 1;
} else {
// Main is always an entry-point - otherwise check if this
// function is called within the module. TODO: we assume no
// recursion.
if (func.getName() == "main")
// Work functions are never allowed to be an entry point.
if (func->getAttr("_dfr_work_function_attribute"))
return;
// Main is always an entry-point - otherwise check if this
// function is called within the module. TODO: we assume no
// recursion.
if (func.getName() == "main")
entryPoints.push_back(func);
else {
bool found = false;
module.walk([&](mlir::func::CallOp op) {
if (getCalledFunction(op) == func)
found = true;
});
if (!found)
entryPoints.push_back(func);
else {
bool found = false;
module.walk([&](mlir::func::CallOp op) {
if (getCalledFunction(op) == func)
found = true;
});
if (!found)
entryPoints.push_back(func);
}
}
});
@@ -459,7 +396,7 @@ struct StartStopPass : public StartStopBase<StartStopPass> {
OpBuilder builder(entryPoint.getBody());
builder.setInsertionPointToStart(&entryPoint.getBody().front());
Value useDFRVal = builder.create<arith::ConstantOp>(
entryPoint.getLoc(), builder.getI64IntegerAttr(useDFR));
entryPoint.getLoc(), builder.getI64IntegerAttr(1));
// Check if this entry point uses a context
Value ctx = nullptr;

View File

@@ -2,12 +2,14 @@ add_mlir_dialect_library(
ConcretelangSDFGTransforms
BufferizableOpInterfaceImpl.cpp
SDFGConvertibleOpInterfaceImpl.cpp
SDFGBufferOwnership.cpp
ADDITIONAL_HEADER_DIRS
${PROJECT_SOURCE_DIR}/include/concretelang/Dialect/Concrete
${PROJECT_SOURCE_DIR}/include/concretelang/Dialect/SDFG
DEPENDS
mlir-headers
SDFGDialect
ConcretelangSDFGTransformsPassIncGen
ConcretelangSDFGInterfaces
LINK_LIBS
PUBLIC

View File

@@ -0,0 +1,112 @@
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
// Exceptions. See
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
// for license information.
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/Operation.h"
#include "concretelang/Conversion/Tools.h"
#include "concretelang/Dialect/Concrete/IR/ConcreteDialect.h"
#include "concretelang/Dialect/Concrete/IR/ConcreteOps.h"
#include "concretelang/Dialect/SDFG/IR/SDFGDialect.h"
#include "concretelang/Dialect/SDFG/IR/SDFGOps.h"
#include "concretelang/Dialect/SDFG/IR/SDFGTypes.h"
#include "concretelang/Dialect/SDFG/Transforms/BufferizableOpInterfaceImpl.h"
#include "concretelang/Support/CompilerEngine.h"
#include <concretelang/Dialect/SDFG/Transforms/Passes.h>
#include <mlir/IR/AffineExpr.h>
#include <mlir/IR/AffineMap.h>
#include <mlir/IR/BuiltinTypes.h>
using namespace mlir;
using namespace mlir::bufferization;
using namespace mlir::tensor;
namespace SDFG = mlir::concretelang::SDFG;
namespace mlir {
namespace concretelang {
namespace {
static void getAliasedUses(Value val, DenseSet<OpOperand *> &aliasedUses) {
for (auto &use : val.getUses()) {
aliasedUses.insert(&use);
if (dyn_cast<ViewLikeOpInterface>(use.getOwner()))
getAliasedUses(use.getOwner()->getResult(0), aliasedUses);
}
}
static func::FuncOp getCalledFunction(CallOpInterface callOp) {
SymbolRefAttr sym = callOp.getCallableForCallee().dyn_cast<SymbolRefAttr>();
if (!sym)
return nullptr;
return dyn_cast_or_null<func::FuncOp>(
SymbolTable::lookupNearestSymbolFrom(callOp, sym));
}
struct SDFGBufferOwnershipPass
: public SDFGBufferOwnershipBase<SDFGBufferOwnershipPass> {
void runOnOperation() override {
auto module = getOperation();
std::vector<Operation *> deallocOps;
// Find all SDFG put operations that use a buffer only used for
// this operation, then deallocated. In such cases there is no
// need to copy the data again in the runtime and we can take
// ownership of the buffer instead, removing the deallocation and
// allowing the runtime to deallocate when appropriate.
module.walk([&](mlir::memref::DeallocOp op) {
DominanceInfo domInfo(op);
Value alloc = op.getOperand();
DenseSet<OpOperand *> aliasedUses;
getAliasedUses(alloc, aliasedUses);
// Check if this memref is used in a SDFG put operation
for (auto use : aliasedUses) {
if (isa<mlir::func::CallOp>(use->getOwner())) {
mlir::func::CallOp callOp = cast<func::CallOp>(use->getOwner());
mlir::func::FuncOp funcOp = getCalledFunction(callOp);
std::string putName = "stream_emulator_put_memref";
if (funcOp.getName().str().compare(0, putName.size(), putName) == 0) {
// If the put operation dominates the deallocation, then
// ownership of the data can be transferred to the runtime
// and deallocation can be removed. We mark the ownership
// flag in the PUT operation to notify the runtime that it
// gets ownership.
if (domInfo.properlyDominates(callOp, op)) {
deallocOps.push_back(op);
OpBuilder builder(callOp);
mlir::Value cst1 = builder.create<mlir::arith::ConstantOp>(
callOp.getLoc(), builder.getI64IntegerAttr(1));
callOp->setOperand(2, cst1);
}
return;
}
}
}
});
for (auto dop : deallocOps) {
dop->erase();
}
}
};
} // end anonymous namespace
std::unique_ptr<mlir::Pass> createSDFGBufferOwnershipPass() {
return std::make_unique<SDFGBufferOwnershipPass>();
}
} // end namespace concretelang
} // end namespace mlir

View File

@@ -997,6 +997,9 @@ public:
return mlir::failure();
}
if (oldOp->getAttr("_dfr_work_function_attribute"))
newOp->setAttr("_dfr_work_function_attribute", rewriter.getUnitAttr());
return mlir::success();
}

View File

@@ -1,12 +1,28 @@
add_compile_options(-fsized-deallocation)
if(CONCRETELANG_CUDA_SUPPORT)
add_library(ConcretelangRuntime SHARED context.cpp simulation.cpp wrappers.cpp DFRuntime.cpp key_manager.cpp
GPUDFG.cpp)
add_library(
ConcretelangRuntime SHARED
context.cpp
utils.cpp
simulation.cpp
wrappers.cpp
DFRuntime.cpp
key_manager.cpp
GPUDFG.cpp
time_util.cpp)
target_link_libraries(ConcretelangRuntime PRIVATE hwloc)
else()
add_library(ConcretelangRuntime SHARED context.cpp simulation.cpp wrappers.cpp DFRuntime.cpp key_manager.cpp
StreamEmulator.cpp)
add_library(
ConcretelangRuntime SHARED
context.cpp
utils.cpp
simulation.cpp
wrappers.cpp
DFRuntime.cpp
key_manager.cpp
GPUDFG.cpp
time_util.cpp)
endif()
add_dependencies(ConcretelangRuntime concrete_cpu concrete_cpu_noise_model concrete-protocol)

View File

@@ -36,6 +36,10 @@ static size_t num_nodes = 0;
static struct timespec init_timer, broadcast_timer, compute_timer, whole_timer;
#endif
} // namespace
void *dl_handle = nullptr;
WorkFunctionRegistry *_dfr_node_level_work_function_registry;
} // namespace dfr
} // namespace concretelang
} // namespace mlir
@@ -99,7 +103,6 @@ void _dfr_create_async_task(wfnptr wfn, void *ctx, size_t num_params,
param_types.push_back(va_arg(args, uint64_t));
}
va_end(args);
dfr_create_async_task_impl(wfn, ctx, refcounted_futures, param_sizes,
param_types, outputs, output_sizes, output_types);
}
@@ -179,6 +182,7 @@ bool _dfr_is_jit() { return is_jit_p; }
bool _dfr_is_root_node() { return is_root_node_p; }
bool _dfr_use_omp() { return use_omp_p; }
bool _dfr_is_distributed() { return num_nodes > 1; }
void _dfr_register_lib(void *dlh) { dl_handle = dlh; }
} // namespace dfr
} // namespace concretelang
} // namespace mlir
@@ -210,8 +214,10 @@ static inline void _dfr_stop_impl() {
}
static inline void _dfr_start_impl(int argc, char *argv[]) {
CONCRETELANG_ENABLE_TIMING();
BEGIN_TIME(&init_timer);
dl_handle = dlopen(nullptr, RTLD_NOW);
if (dl_handle == nullptr)
dl_handle = dlopen(nullptr, RTLD_NOW);
// If OpenMP is to be used, we need to force its initialization
// before thread binding occurs. Otherwise OMP threads will be bound
@@ -315,7 +321,7 @@ static inline void _dfr_start_impl(int argc, char *argv[]) {
is_root_node_p = (hpx::find_here() == hpx::find_root_locality());
num_nodes = hpx::get_num_localities().get();
new WorkFunctionRegistry();
_dfr_node_level_work_function_registry = new WorkFunctionRegistry();
char *env = getenv("DFR_LAZY_KEY_TRANSFER");
bool lazy = false;
@@ -324,7 +330,7 @@ static inline void _dfr_start_impl(int argc, char *argv[]) {
!strncmp(env, "On", 2) || !strncmp(env, "on", 2) ||
!strncmp(env, "1", 1))
lazy = true;
new RuntimeContextManager(lazy);
_dfr_node_level_runtime_context_manager = new RuntimeContextManager(lazy);
_dfr_jit_phase_barrier = new hpx::distributed::barrier(
"phase_barrier", num_nodes, hpx::get_locality_id());
@@ -346,6 +352,7 @@ static inline void _dfr_start_impl(int argc, char *argv[]) {
JIT invocation). These serve to pause/resume the runtime
scheduler and to clean up used resources. */
void _dfr_start(int64_t use_dfr_p, void *ctx) {
CONCRETELANG_ENABLE_TIMING();
BEGIN_TIME(&whole_timer);
if (use_dfr_p) {
// The first invocation will initialise the runtime. As each call to
@@ -365,12 +372,10 @@ void _dfr_start(int64_t use_dfr_p, void *ctx) {
if (num_nodes > 1) {
BEGIN_TIME(&broadcast_timer);
_dfr_node_level_runtime_context_manager->setContext(ctx);
}
if (_dfr_is_root_node())
_dfr_startup_barrier->wait();
if (num_nodes > 1 && ctx) {
END_TIME(&broadcast_timer, "Key broadcasting");
if (ctx) {
END_TIME(&broadcast_timer, "Key broadcasting");
}
}
}
BEGIN_TIME(&compute_timer);
@@ -383,24 +388,29 @@ void _dfr_start(int64_t use_dfr_p, void *ctx) {
void _dfr_stop(int64_t use_dfr_p) {
if (use_dfr_p) {
if (num_nodes > 1) {
// Non-root nodes synchronize here with the root to mark the point
// where the root is free to send work out (only needed in JIT).
if (!_dfr_is_root_node())
_dfr_startup_barrier->wait();
// The barrier is only needed to synchronize the different
// computation phases when the compute nodes need to generate and
// register new work functions in each phase.
_dfr_jit_phase_barrier->wait();
_dfr_node_level_runtime_context_manager->clearContext();
_dfr_node_level_work_function_registry->clearRegistry();
_dfr_jit_phase_barrier->wait();
}
}
END_TIME(&compute_timer, "Compute");
END_TIME(&whole_timer, "Total execution");
}
namespace mlir {
namespace concretelang {
namespace dfr {
void _dfr_run_remote_scheduler() {
_dfr_start(1, nullptr);
_dfr_stop(1);
}
} // namespace dfr
} // namespace concretelang
} // namespace mlir
void _dfr_try_initialize() {
// Initialize and immediately suspend the HPX runtime if not yet done.
uint64_t expected = uninitialised;
@@ -484,6 +494,8 @@ bool _dfr_is_jit() { return is_jit_p; }
bool _dfr_is_root_node() { return true; }
bool _dfr_use_omp() { return use_omp_p; }
bool _dfr_is_distributed() { return num_nodes > 1; }
void _dfr_run_remote_scheduler() {}
void _dfr_register_lib(void *dlh) {}
} // namespace dfr
} // namespace concretelang
@@ -491,8 +503,8 @@ bool _dfr_is_distributed() { return num_nodes > 1; }
using namespace mlir::concretelang::dfr;
void _dfr_start(int64_t use_dfr_p, void *ctx) { BEGIN_TIME(&compute_timer); }
void _dfr_stop(int64_t use_dfr_p) { END_TIME(&compute_timer, "Compute"); }
void _dfr_start(int64_t use_dfr_p, void *ctx) {}
void _dfr_stop(int64_t use_dfr_p) {}
void _dfr_terminate() {}
#endif

View File

@@ -3,6 +3,7 @@
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
// for license information.
#ifdef CONCRETELANG_CUDA_SUPPORT
#include <atomic>
#include <cmath>
#include <cstdarg>
@@ -18,15 +19,11 @@
#include <utility>
#include <vector>
#include <concretelang/Runtime/GPUDFG.hpp>
#include <concretelang/Runtime/stream_emulator_api.h>
#include <concretelang/Runtime/time_util.h>
#include <concretelang/Runtime/wrappers.h>
#ifdef CONCRETELANG_CUDA_SUPPORT
#include "bootstrap.h"
#include "device.h"
#include "keyswitch.h"
#include "linear_algebra.h"
using RuntimeContext = mlir::concretelang::RuntimeContext;
namespace mlir {
@@ -34,6 +31,10 @@ namespace concretelang {
namespace gpu_dfg {
namespace {
#if CONCRETELANG_TIMING_ENABLED
static struct timespec init_timer, blocking_get_timer, acc1, acc2;
#endif
using MemRef2 = MemRefDescriptor<2>;
// When not using all accelerators on the machine, we distribute work
@@ -93,6 +94,7 @@ union Context {
// across multiple locations.
static const int32_t host_location = -1;
static const int32_t split_location = -2;
static const int32_t invalid_location = -3;
// Similarly dependence chunks are either indexed (which does not
// always correlate to the device index on which they are located) or
// this dependence is split further.
@@ -216,17 +218,12 @@ struct GPU_DFG {
polynomial_size,
input_lwe_ciphertext_count);
}
void free_streams() {
streams.sort();
streams.unique();
for (auto s : streams)
delete s;
}
inline void *get_gpu_stream(int32_t loc) {
if (loc < 0)
return nullptr;
return gpus[loc].get_gpu_stream();
}
void free_streams();
private:
std::list<void *> to_free_list;
@@ -253,6 +250,7 @@ struct Dependence {
int32_t chunk_id;
size_t stream_generation;
std::vector<Dependence *> chunks;
std::vector<size_t> chunking_schedule;
Dependence(int32_t l, MemRef2 hd, void *dd, bool ohr, bool alloc = false,
int32_t chunk_id = single_chunk, size_t gen = 0)
: location(l), host_data(hd), device_data(dd), onHostReady(ohr),
@@ -266,17 +264,41 @@ struct Dependence {
// Split a dependence into a number of chunks either to run on
// multiple GPUs or execute concurrently on the host.
void split_dependence(size_t num_chunks, size_t num_gpu_chunks,
size_t chunk_dim, bool constant) {
size_t chunk_dim, bool constant,
size_t gpu_chunk_factor, GPU_DFG *dfg) {
// If this dependence is already split, check that the split
// matches the new request
if (chunk_id == split_chunks) {
if (num_chunks + num_gpu_chunks != chunks.size())
warnx("WARNING: requesting to split dependence across different number "
"of chunks (%lu) than it already is split (%lu) which would "
"require remapping. This is not supported.",
num_chunks + num_gpu_chunks, chunks.size());
assert(num_chunks + num_gpu_chunks == chunks.size());
return;
if (num_chunks + num_gpu_chunks != chunks.size()) {
// If this is not available on host, then we need to merge on
// host and re-split
if (!onHostReady) {
size_t data_size = 0;
size_t num_samples = 0;
for (auto c : chunks) {
move_chunk_off_device(c->chunk_id, dfg);
data_size += memref_get_data_size(c->host_data);
num_samples += c->host_data.sizes[chunk_dim];
sdfg_gpu_debug_print_mref("Chunk", c->host_data);
}
host_data = chunks[0]->host_data;
host_data.allocated = host_data.aligned =
(uint64_t *)malloc(data_size);
host_data.sizes[chunk_dim] = num_samples;
size_t pos = 0;
for (auto c : chunks) {
memcpy(((char *)host_data.aligned) + pos, c->host_data.aligned,
memref_get_data_size(c->host_data));
pos += memref_get_data_size(c->host_data);
}
for (auto c : chunks)
free_chunk_host_data(c->chunk_id, dfg);
onHostReady = true;
hostAllocated = true;
}
} else {
return;
}
}
if (!chunks.empty()) {
for (auto c : chunks)
@@ -301,8 +323,8 @@ struct Dependence {
return;
}
size_t chunk_size =
num_samples / (num_chunks + num_gpu_chunks * device_compute_factor);
size_t gpu_chunk_size = chunk_size * device_compute_factor;
num_samples / (num_chunks + num_gpu_chunks * gpu_chunk_factor);
size_t gpu_chunk_size = chunk_size * gpu_chunk_factor;
chunk_size = (num_samples - gpu_chunk_size * num_gpu_chunks) / num_chunks;
size_t chunk_remainder =
(num_samples - gpu_chunk_size * num_gpu_chunks) % num_chunks;
@@ -331,62 +353,43 @@ struct Dependence {
chunk_id = split_chunks;
location = split_location;
}
void merge_dependence(GPU_DFG *dfg) {
void finalize_merged_dependence(GPU_DFG *dfg) {
assert(!chunks.empty() && "Cannot merge dependence with no chunks");
size_t data_size = 0;
size_t num_samples = 0;
for (auto c : chunks) {
data_size += memref_get_data_size(c->host_data);
num_samples += c->host_data.sizes[0];
}
uint64_t *data = (uint64_t *)malloc(data_size);
MemRef2 output = {data,
data,
0,
{num_samples, chunks.front()->host_data.sizes[1]},
{chunks.front()->host_data.sizes[1], 1}};
std::list<cudaStream_t *> custreams_used;
for (auto c : chunks) {
// Write out the piece in the final target dependence
size_t csize = memref_get_data_size(c->host_data);
if (c->onHostReady) {
memcpy(((char *)output.aligned) + output.offset, c->host_data.aligned,
csize);
} else {
assert(c->location > host_location);
cudaStream_t *s = (cudaStream_t *)dfg->get_gpu_stream(c->location);
cuda_memcpy_async_to_cpu(((char *)output.aligned) + output.offset,
c->device_data, csize, s, c->location);
custreams_used.push_back(s);
}
output.offset += csize;
}
output.offset = 0;
for (auto c : chunks)
c->free_data(dfg, true);
assert(host_data.allocated != nullptr);
chunks.clear();
custreams_used.sort();
custreams_used.unique();
for (auto s : custreams_used)
cudaStreamSynchronize(*s);
location = host_location;
onHostReady = true;
assert(host_data.allocated == nullptr);
host_data = output;
assert(device_data == nullptr);
hostAllocated = true;
chunk_id = single_chunk;
}
void copy_chunk_off_device(int32_t chunk_id, GPU_DFG *dfg) {
if (chunks[chunk_id]->onHostReady)
return;
chunks[chunk_id]->copy(host_location, dfg, false);
}
void move_chunk_off_device(int32_t chunk_id, GPU_DFG *dfg) {
chunks[chunk_id]->copy(host_location, dfg);
copy_chunk_off_device(chunk_id, dfg);
chunks[chunk_id]->location = host_location;
if (chunks[chunk_id]->device_data == nullptr)
return;
cuda_drop_async(
chunks[chunk_id]->device_data,
(cudaStream_t *)dfg->get_gpu_stream(chunks[chunk_id]->location),
chunks[chunk_id]->location);
chunks[chunk_id]->location = host_location;
chunks[chunk_id]->device_data = nullptr;
}
void merge_output_off_device(int32_t chunk_id, GPU_DFG *dfg) {
assert(chunks[chunk_id]->location > host_location);
size_t data_offset = 0;
for (int32_t c = 0; c < chunk_id; ++c)
data_offset +=
chunking_schedule[c] * host_data.sizes[1] * sizeof(uint64_t);
size_t csize = memref_get_data_size(chunks[chunk_id]->host_data);
cudaStream_t *s =
(cudaStream_t *)dfg->get_gpu_stream(chunks[chunk_id]->location);
cuda_memcpy_async_to_cpu(((char *)host_data.aligned) + data_offset,
chunks[chunk_id]->device_data, csize, s,
chunks[chunk_id]->location);
}
void free_chunk_host_data(int32_t chunk_id, GPU_DFG *dfg) {
assert(chunks[chunk_id]->location == host_location &&
@@ -397,16 +400,18 @@ struct Dependence {
chunks[chunk_id]->onHostReady = false;
}
void free_chunk_device_data(int32_t chunk_id, GPU_DFG *dfg) {
assert(chunks[chunk_id]->location > host_location &&
chunks[chunk_id]->device_data != nullptr);
if (chunks[chunk_id]->device_data == nullptr)
return;
cuda_drop_async(
chunks[chunk_id]->device_data,
(cudaStream_t *)dfg->get_gpu_stream(chunks[chunk_id]->location),
chunks[chunk_id]->location);
chunks[chunk_id]->device_data = nullptr;
chunks[chunk_id]->location =
(chunks[chunk_id]->onHostReady) ? host_location : invalid_location;
}
inline void free_data(GPU_DFG *dfg, bool immediate = false) {
if (location >= 0 && device_data != nullptr) {
if (device_data != nullptr) {
cuda_drop_async(device_data,
(cudaStream_t *)dfg->get_gpu_stream(location), location);
}
@@ -414,17 +419,19 @@ struct Dependence {
// As streams are not synchronized aside from the GET operation,
// we cannot free host-side data until after the synchronization
// point as it could still be used by an asynchronous operation.
if (immediate)
if (immediate) {
free(host_data.allocated);
else
host_data.allocated = nullptr;
} else {
dfg->register_stream_order_dependent_allocation(host_data.allocated);
}
}
for (auto c : chunks)
c->free_data(dfg, immediate);
chunks.clear();
delete (this);
}
inline void copy(int32_t loc, GPU_DFG *dfg) {
inline void copy(int32_t loc, GPU_DFG *dfg, bool synchronize = true) {
size_t data_size = memref_get_data_size(host_data);
if (loc == location)
return;
@@ -438,7 +445,8 @@ struct Dependence {
cudaStream_t *s = (cudaStream_t *)dfg->get_gpu_stream(location);
cuda_memcpy_async_to_cpu(host_data.aligned, device_data, data_size, s,
location);
cudaStreamSynchronize(*s);
if (synchronize)
cudaStreamSynchronize(*s);
onHostReady = true;
} else {
assert(onHostReady &&
@@ -508,6 +516,7 @@ struct Stream {
bool ct_stream;
bool pt_stream;
size_t generation;
std::atomic<size_t> uses = {0};
const char *name;
Stream(stream_type t, const char *sname = nullptr)
: dep(nullptr), type(t), producer(nullptr), dfg(nullptr),
@@ -524,7 +533,7 @@ struct Stream {
}
~Stream() {
if (dep != nullptr)
dep->free_data(dfg);
dep->free_data(dfg, true);
if (producer != nullptr)
delete producer;
}
@@ -547,6 +556,7 @@ struct Stream {
dep = d;
}
dep->stream_generation = generation;
uses = 0;
}
// For a given dependence, traverse the DFG backwards to extract the lattice
// of kernels required to execute to produce this data
@@ -579,18 +589,22 @@ struct Stream {
size_t subgraph_bootstraps = 0;
for (auto p : queue) {
is_batched_subgraph |= p->batched_process;
subgraph_bootstraps +=
(p->fun == memref_bootstrap_lwe_u64_process) ? 1 : 0;
subgraph_bootstraps += (p->fun == memref_bootstrap_lwe_u64_process ||
p->fun == memref_keyswitch_lwe_u64_process)
? 1
: 0;
}
// If this subgraph is not batched, then use this DFG's allocated
// GPU to offload to. If this does not bootstrap, just execute on
// the host.
if (!is_batched_subgraph) {
for (auto p : queue) {
schedule_kernel(
p, (subgraph_bootstraps > 0) ? dfg->gpu_idx : host_location,
single_chunk, nullptr);
}
int32_t loc = (subgraph_bootstraps > 0) ? dfg->gpu_idx : host_location;
for (auto p : queue)
schedule_kernel(p, loc, single_chunk,
(p == producer) ? out.aligned : nullptr);
if (loc != host_location)
dep->copy(host_location, dfg, true);
dep->onHostReady = true;
return;
}
@@ -645,8 +659,6 @@ struct Stream {
num_real_inputs++;
if (s->dep->host_data.sizes[0] > num_samples)
num_samples = s->dep->host_data.sizes[0];
if (!s->dep->chunks.empty())
num_samples = s->dep->chunks.size();
} else {
mem_per_sample += sizeof(uint64_t);
}
@@ -657,7 +669,7 @@ struct Stream {
(num_real_inputs ? num_real_inputs : 1);
size_t num_chunks = 1;
size_t num_gpu_chunks = 0;
int32_t num_devices_to_use = 0;
size_t gpu_chunk_factor = device_compute_factor;
// If the subgraph does not have sufficient computational
// intensity (which we approximate by whether it bootstraps), then
// we assume (TODO: confirm with profiling) that it is not
@@ -681,16 +693,20 @@ struct Stream {
(available_mem - const_mem_per_sample) /
((mem_per_sample ? mem_per_sample : 1) * gpu_memory_inflation_factor);
if (num_samples < num_cores + device_compute_factor * num_devices) {
num_devices_to_use = 0;
while (gpu_chunk_factor > 4) {
if (num_samples < num_cores + gpu_chunk_factor * num_devices)
gpu_chunk_factor >>= 1;
else
break;
}
if (num_samples < num_cores + gpu_chunk_factor * num_devices) {
num_chunks = std::min(num_cores, num_samples);
} else {
num_devices_to_use = num_devices;
size_t compute_resources =
num_cores + num_devices * device_compute_factor;
size_t compute_resources = num_cores + num_devices * gpu_chunk_factor;
size_t gpu_chunk_size =
std::ceil((double)num_samples / compute_resources) *
device_compute_factor;
gpu_chunk_factor;
size_t scale_factor =
std::ceil((double)gpu_chunk_size / max_samples_per_chunk);
num_chunks = num_cores * scale_factor;
@@ -702,7 +718,8 @@ struct Stream {
for (auto i : inputs)
i->dep->split_dependence(num_chunks, num_gpu_chunks,
(i->ct_stream) ? 0 : 1, i->const_stream);
(i->ct_stream) ? 0 : 1, i->const_stream,
gpu_chunk_factor, dfg);
for (auto iv : intermediate_values) {
if (iv->need_new_gen()) {
iv->put(new Dependence(split_location,
@@ -712,21 +729,67 @@ struct Stream {
}
}
for (auto o : outputs) {
if (o->need_new_gen()) {
o->put(new Dependence(split_location,
{nullptr, nullptr, 0, {0, 0}, {0, 0}}, nullptr,
false, false, split_chunks));
o->dep->chunks.resize(num_chunks + num_gpu_chunks, nullptr);
if (!o->need_new_gen())
continue;
std::function<uint64_t(Stream *)> get_output_size =
[&](Stream *s) -> uint64_t {
uint64_t res = 0;
// If this stream is not produced within SDFG, we could use
// the input size. For now return 0.
if (s->producer == nullptr)
return 0;
// If the producer process has an output size registered,
// return it.
if (s->producer->output_size.val > 0)
return s->producer->output_size.val;
// Finally we look for sizes from inputs to the producer if
// we don't have it registered as poly size does not change
// in operators that do not register size.
for (auto p : s->producer->input_streams) {
uint64_t p_size = get_output_size(p);
if (p_size == 0)
continue;
if (res == 0)
res = get_output_size(p);
else
assert(res == p_size);
}
return res;
};
MemRef2 out_mref;
bool allocated = false;
if (o == this) {
out_mref = out;
} else {
uint64_t output_size = get_output_size(o);
out_mref = {0, 0, 0, {num_samples, output_size}, {output_size, 1}};
size_t data_size = memref_get_data_size(out_mref);
out_mref.allocated = out_mref.aligned = (uint64_t *)malloc(data_size);
allocated = true;
}
o->put(new Dependence(split_location, out_mref, nullptr, false, allocated,
split_chunks));
o->dep->chunks.resize(num_chunks + num_gpu_chunks, nullptr);
}
for (auto o : outputs) {
o->dep->chunking_schedule.clear();
for (auto i : inputs) {
size_t cdim = (i->ct_stream) ? 0 : 1;
if (i->dep->host_data.sizes[cdim] == num_samples) {
for (auto c : i->dep->chunks)
o->dep->chunking_schedule.push_back(c->host_data.sizes[cdim]);
break;
}
}
}
// Execute graph
std::list<std::thread> workers;
std::list<std::thread> gpu_schedulers;
std::vector<std::list<size_t>> gpu_chunk_list;
gpu_chunk_list.resize(num_devices);
int32_t dev = 0;
for (size_t c = 0; c < num_chunks + num_gpu_chunks; ++c) {
for (int c = num_chunks + num_gpu_chunks - 1; c >= 0; --c) {
if (!subgraph_bootstraps) {
workers.push_back(std::thread(
[&](std::list<Process *> queue, size_t c, int32_t host_location) {
@@ -742,7 +805,21 @@ struct Stream {
workers.push_back(std::thread(
[&](std::list<Process *> queue, size_t c, int32_t host_location) {
for (auto p : queue) {
schedule_kernel(p, host_location, c, nullptr);
Stream *os = p->output_streams[0];
auto it = std::find(outputs.begin(), outputs.end(), os);
if (it == outputs.end()) {
schedule_kernel(p, host_location, c, nullptr);
} else {
size_t data_offset = 0;
for (int32_t ch = 0; ch < c; ++ch)
data_offset +=
outputs.front()->dep->chunking_schedule[ch] *
os->dep->host_data.sizes[1] * sizeof(uint64_t);
schedule_kernel(
p, host_location, c,
(uint64_t *)(((char *)os->dep->host_data.aligned) +
data_offset));
}
}
for (auto iv : intermediate_values)
if (iv->consumers.size() == 1)
@@ -765,15 +842,35 @@ struct Stream {
assert(status == cudaSuccess);
cudaMemGetInfo(&gpu_free_mem, &gpu_total_mem);
assert(status == cudaSuccess);
for (auto p : queue)
for (auto p : queue) {
schedule_kernel(p, dev, c, nullptr);
for (auto out_str : p->output_streams) {
// For all output streams, if this is an output,
// schedule copy out of the data produced by this
// process.
if (auto it =
std::find(outputs.begin(), outputs.end(), out_str);
it != outputs.end()) {
out_str->dep->merge_output_off_device(c, dfg);
continue;
}
// If this is not an output, but some process is not
// part of this subgraph, we need to copy the data
// out.
for (auto cons_proc : out_str->consumers)
if (auto it =
std::find(queue.begin(), queue.end(), cons_proc);
it == queue.end())
out_str->dep->copy_chunk_off_device(c, dfg);
}
}
for (auto i : inputs)
if (++i->uses == i->consumers.size())
i->dep->free_chunk_device_data(c, dfg);
for (auto iv : intermediate_values)
if (iv->consumers.size() > 1)
iv->dep->move_chunk_off_device(c, dfg);
else
iv->dep->free_chunk_device_data(c, dfg);
iv->dep->free_chunk_device_data(c, dfg);
for (auto o : outputs)
o->dep->move_chunk_off_device(c, dfg);
o->dep->free_chunk_device_data(c, dfg);
cudaStreamSynchronize(*(cudaStream_t *)dfg->get_gpu_stream(dev));
}
},
@@ -786,65 +883,43 @@ struct Stream {
gs.join();
gpu_schedulers.clear();
// Build output out of the separate chunks processed
for (auto o : outputs) {
assert(o->batched_stream && o->ct_stream &&
"Only operations with ciphertext output supported.");
o->dep->merge_dependence(dfg);
}
for (auto o : outputs)
o->dep->finalize_merged_dependence(dfg);
for (dev = 0; dev < num_devices; ++dev)
cudaStreamSynchronize(*(cudaStream_t *)dfg->get_gpu_stream(dev));
// We will assume that only one subgraph is being processed per
// DFG at a time, so we can safely free these here.
dfg->free_stream_order_dependent_data();
return;
}
Dependence *get_on_host(MemRef2 &out) {
void get_on_host(MemRef2 &out) {
// Schedule the execution of the SDFG subgraph required to compute
// the value requested
schedule_work(out);
assert(dep != nullptr && "GET on empty stream not allowed.");
// If this was already copied to host, copy out
if (dep->onHostReady) {
memref_copy_contiguous(out, dep->host_data);
return dep;
} else if (dep->location == split_location) {
char *pos = (char *)(out.aligned + out.offset);
std::list<int32_t> devices_used;
for (auto c : dep->chunks) {
size_t data_size = memref_get_data_size(c->host_data);
cuda_memcpy_async_to_cpu(
pos, c->device_data, data_size,
(cudaStream_t *)dfg->get_gpu_stream(c->location), c->location);
pos += data_size;
devices_used.push_back(c->location);
}
// We should only synchronize devices that had data chunks
devices_used.sort();
devices_used.unique();
for (auto i : devices_used)
cudaStreamSynchronize(*(cudaStream_t *)dfg->get_gpu_stream(i));
} else {
size_t data_size = memref_get_data_size(dep->host_data);
cuda_memcpy_async_to_cpu(out.aligned + out.offset, dep->device_data,
data_size, (cudaStream_t *)dfg->gpu_stream,
dep->location);
cudaStreamSynchronize(*(cudaStream_t *)dfg->gpu_stream);
}
// After this synchronization point, all of the host-side
// allocated memory can be freed as we know all asynchronous
// operations have finished.
dfg->free_stream_order_dependent_data();
// The result should already have been copied to host, nothing to
// do (synchronization of transfers to host are pre-synchronized
// in schedule_work).
assert(dep != nullptr && dep->onHostReady);
// For now we make a copy of this dependence for future use as we
// can't assume that the output location will remain live until
// the next use.
// TODO: eliminate this copy.
if (!dep->hostAllocated)
dep->host_data = memref_copy_alloc(out);
dep->onHostReady = true;
dep->hostAllocated = true;
return dep;
}
Dependence *get(int32_t location, int32_t chunk_id = single_chunk) {
assert(dep != nullptr && "Dependence could not be computed.");
assert(chunk_id != split_chunks);
if (chunk_id != single_chunk) {
Dependence *d = dep->chunks[chunk_id];
d->copy(location, dfg);
d->copy(location, dfg, false);
return d;
}
dep->copy(location, dfg);
dep->copy(location, dfg, false);
return dep;
}
inline bool need_new_gen(int32_t chunk_id = single_chunk) {
@@ -863,6 +938,13 @@ struct Stream {
}
};
void GPU_DFG::free_streams() {
streams.sort();
streams.unique();
for (auto s : streams)
delete s;
}
static inline mlir::concretelang::gpu_dfg::Process *
make_process_1_1(void *dfg, void *sin1, void *sout,
void (*fun)(Process *, int32_t, int32_t, uint64_t *)) {
@@ -882,6 +964,7 @@ make_process_1_1(void *dfg, void *sin1, void *sout,
p->dfg->register_stream(s1);
p->dfg->register_stream(so);
p->batched_process = s1->batched_stream;
p->output_size.val = 0;
return p;
}
@@ -909,6 +992,7 @@ make_process_2_1(void *dfg, void *sin1, void *sin2, void *sout,
p->dfg->register_stream(s2);
p->dfg->register_stream(so);
p->batched_process = s1->batched_stream;
p->output_size.val = 0;
return p;
}
@@ -957,8 +1041,11 @@ void memref_keyswitch_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
uint64_t *out_ptr) {
auto sched = [&](Dependence *d) {
uint64_t num_samples = d->host_data.sizes[0];
MemRef2 out = {
0, 0, 0, {num_samples, p->output_size.val}, {p->output_size.val, 1}};
MemRef2 out = {out_ptr,
out_ptr,
0,
{num_samples, p->output_size.val},
{p->output_size.val, 1}};
size_t data_size = memref_get_data_size(out);
if (loc == host_location) {
// If it is not profitable to offload, schedule kernel on CPU
@@ -972,8 +1059,8 @@ void memref_keyswitch_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
d->host_data.strides[1], p->level.val, p->base_log.val,
p->input_lwe_dim.val, p->output_lwe_dim.val, p->sk_index.val,
p->ctx.val);
Dependence *dep =
new Dependence(loc, out, nullptr, true, true, d->chunk_id);
Dependence *dep = new Dependence(loc, out, nullptr, true,
(out_ptr == nullptr), d->chunk_id);
return dep;
} else {
// Schedule the keyswitch kernel on the GPU
@@ -999,7 +1086,8 @@ void memref_keyswitch_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
void memref_bootstrap_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
uint64_t *out_ptr) {
assert(p->output_size.val == p->glwe_dim.val * p->poly_size.val + 1);
if (!p->output_streams[0]->need_new_gen(chunk_id))
return;
Dependence *idep1 = p->input_streams[1]->get(host_location, chunk_id);
MemRef2 &mtlu = idep1->host_data;
uint32_t num_lut_vectors = mtlu.sizes[0];
@@ -1023,8 +1111,11 @@ void memref_bootstrap_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
std::vector<size_t> &lut_indexes, cudaStream_t *s,
int32_t loc) {
uint64_t num_samples = d0->host_data.sizes[0];
MemRef2 out = {
0, 0, 0, {num_samples, p->output_size.val}, {p->output_size.val, 1}};
MemRef2 out = {out_ptr,
out_ptr,
0,
{num_samples, p->output_size.val},
{p->output_size.val, 1}};
size_t data_size = memref_get_data_size(out);
// Move test vector indexes to the GPU, the test vector indexes is set of 0
@@ -1065,8 +1156,8 @@ void memref_bootstrap_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
d1->host_data.strides[1], p->input_lwe_dim.val, p->poly_size.val,
p->level.val, p->base_log.val, p->glwe_dim.val, p->sk_index.val,
p->ctx.val);
Dependence *dep =
new Dependence(loc, out, nullptr, true, true, d0->chunk_id);
Dependence *dep = new Dependence(loc, out, nullptr, true,
(out_ptr == nullptr), d0->chunk_id);
free(glwe_ct);
return dep;
} else {
@@ -1129,8 +1220,8 @@ void memref_add_lwe_ciphertexts_u64_process(Process *p, int32_t loc,
assert(d0->host_data.sizes[1] == d1->host_data.sizes[1]);
assert(d0->chunk_id == d1->chunk_id);
uint64_t num_samples = d0->host_data.sizes[0];
MemRef2 out = {0,
0,
MemRef2 out = {out_ptr,
out_ptr,
0,
{num_samples, d0->host_data.sizes[1]},
{d0->host_data.sizes[1], 1}};
@@ -1148,8 +1239,8 @@ void memref_add_lwe_ciphertexts_u64_process(Process *p, int32_t loc,
d1->host_data.aligned, d1->host_data.offset, d1->host_data.sizes[0],
d1->host_data.sizes[1], d1->host_data.strides[0],
d1->host_data.strides[1]);
Dependence *dep =
new Dependence(loc, out, nullptr, true, true, d0->chunk_id);
Dependence *dep = new Dependence(loc, out, nullptr, true,
(out_ptr == nullptr), d0->chunk_id);
return dep;
} else {
// Schedule the kernel on the GPU
@@ -1179,8 +1270,8 @@ void memref_add_plaintext_lwe_ciphertext_u64_process(Process *p, int32_t loc,
d1->host_data.sizes[1] == 1);
assert(d0->chunk_id == d1->chunk_id);
uint64_t num_samples = d0->host_data.sizes[0];
MemRef2 out = {0,
0,
MemRef2 out = {out_ptr,
out_ptr,
0,
{num_samples, d0->host_data.sizes[1]},
{d0->host_data.sizes[1], 1}};
@@ -1206,8 +1297,8 @@ void memref_add_plaintext_lwe_ciphertext_u64_process(Process *p, int32_t loc,
d0->host_data.strides[1], d1->host_data.allocated,
d1->host_data.aligned, d1->host_data.offset, d1->host_data.sizes[1],
d1->host_data.strides[1]);
Dependence *dep =
new Dependence(loc, out, nullptr, true, true, d0->chunk_id);
Dependence *dep = new Dependence(loc, out, nullptr, true,
(out_ptr == nullptr), d0->chunk_id);
return dep;
} else {
// Schedule the kernel on the GPU
@@ -1237,8 +1328,8 @@ void memref_mul_cleartext_lwe_ciphertext_u64_process(Process *p, int32_t loc,
d1->host_data.sizes[1] == 1);
assert(d0->chunk_id == d1->chunk_id);
uint64_t num_samples = d0->host_data.sizes[0];
MemRef2 out = {0,
0,
MemRef2 out = {out_ptr,
out_ptr,
0,
{num_samples, d0->host_data.sizes[1]},
{d0->host_data.sizes[1], 1}};
@@ -1264,8 +1355,8 @@ void memref_mul_cleartext_lwe_ciphertext_u64_process(Process *p, int32_t loc,
d0->host_data.strides[1], d1->host_data.allocated,
d1->host_data.aligned, d1->host_data.offset, d1->host_data.sizes[1],
d1->host_data.strides[1]);
Dependence *dep =
new Dependence(loc, out, nullptr, true, true, d0->chunk_id);
Dependence *dep = new Dependence(loc, out, nullptr, true,
(out_ptr == nullptr), d0->chunk_id);
return dep;
} else {
// Schedule the keyswitch kernel on the GPU
@@ -1291,8 +1382,8 @@ void memref_negate_lwe_ciphertext_u64_process(Process *p, int32_t loc,
uint64_t *out_ptr) {
auto sched = [&](Dependence *d0, cudaStream_t *s, int32_t loc) {
uint64_t num_samples = d0->host_data.sizes[0];
MemRef2 out = {0,
0,
MemRef2 out = {out_ptr,
out_ptr,
0,
{num_samples, d0->host_data.sizes[1]},
{d0->host_data.sizes[1], 1}};
@@ -1307,8 +1398,8 @@ void memref_negate_lwe_ciphertext_u64_process(Process *p, int32_t loc,
d0->host_data.aligned, d0->host_data.offset, d0->host_data.sizes[0],
d0->host_data.sizes[1], d0->host_data.strides[0],
d0->host_data.strides[1]);
Dependence *dep =
new Dependence(loc, out, nullptr, true, true, d0->chunk_id);
Dependence *dep = new Dependence(loc, out, nullptr, true,
(out_ptr == nullptr), d0->chunk_id);
return dep;
} else {
// Schedule the kernel on the GPU
@@ -1544,12 +1635,14 @@ void *stream_emulator_make_memref_stream(const char *name, stream_type stype) {
}
void stream_emulator_put_memref(void *stream, uint64_t *allocated,
uint64_t *aligned, uint64_t offset,
uint64_t size, uint64_t stride) {
uint64_t size, uint64_t stride,
uint64_t data_ownership = 0) {
assert(stride == 1 && "Strided memrefs not supported");
Stream *s = (Stream *)stream;
MemRef2 m = {allocated, aligned, offset, {1, size}, {size, stride}};
Dependence *dep =
new Dependence(host_location, memref_copy_alloc(m), nullptr, true, true);
new Dependence(host_location, (data_ownership) ? m : memref_copy_alloc(m),
nullptr, true, true);
s->put(dep);
s->generation++;
}
@@ -1573,12 +1666,14 @@ void *stream_emulator_make_memref_batch_stream(const char *name,
void stream_emulator_put_memref_batch(void *stream, uint64_t *allocated,
uint64_t *aligned, uint64_t offset,
uint64_t size0, uint64_t size1,
uint64_t stride0, uint64_t stride1) {
uint64_t stride0, uint64_t stride1,
uint64_t data_ownership = 0) {
assert(stride1 == 1 && "Strided memrefs not supported");
Stream *s = (Stream *)stream;
MemRef2 m = {allocated, aligned, offset, {size0, size1}, {stride0, stride1}};
Dependence *dep =
new Dependence(host_location, memref_copy_alloc(m), nullptr, true, true);
new Dependence(host_location, (data_ownership) ? m : memref_copy_alloc(m),
nullptr, true, true);
s->put(dep);
s->generation++;
}
@@ -1587,6 +1682,10 @@ void stream_emulator_get_memref_batch(void *stream, uint64_t *out_allocated,
uint64_t out_offset, uint64_t out_size0,
uint64_t out_size1, uint64_t out_stride0,
uint64_t out_stride1) {
static size_t count = 0;
END_TIME_C_ACC(&blocking_get_timer, "Non-GPU section execution", count,
&acc1);
BEGIN_TIME(&blocking_get_timer);
assert(out_stride1 == 1 && "Strided memrefs not supported");
MemRef2 mref = {out_allocated,
out_aligned,
@@ -1595,9 +1694,13 @@ void stream_emulator_get_memref_batch(void *stream, uint64_t *out_allocated,
{out_stride0, out_stride1}};
auto s = (Stream *)stream;
s->get_on_host(mref);
END_TIME_C_ACC(&blocking_get_timer, "GPU section execution", count++, &acc2);
BEGIN_TIME(&blocking_get_timer);
}
void *stream_emulator_init() {
CONCRETELANG_ENABLE_TIMING();
BEGIN_TIME(&init_timer);
int num;
assert(cudaGetDeviceCount(&num) == cudaSuccess);
num_devices = num;
@@ -1646,9 +1749,42 @@ void *stream_emulator_init() {
if (num_cores < 1)
num_cores = 1;
END_TIME(&init_timer, "Initialization of the SDFG runtime");
BEGIN_TIME(&init_timer);
int device = next_device.fetch_add(1) % num_devices;
return new GPU_DFG(device);
}
void stream_emulator_run(void *dfg) {}
void stream_emulator_run(void *dfg) {
END_TIME(&init_timer, "Building the SDFG graph");
BEGIN_TIME(&blocking_get_timer);
}
void stream_emulator_delete(void *dfg) { delete (GPU_DFG *)dfg; }
#endif
namespace mlir {
namespace concretelang {
namespace gpu_dfg {
bool check_cuda_device_available() {
#ifdef CONCRETELANG_CUDA_SUPPORT
int num;
if (cudaGetDeviceCount(&num) != cudaSuccess)
return false;
return num > 0;
#else
return false;
#endif
}
bool check_cuda_runtime_enabled() {
#ifdef CONCRETELANG_CUDA_SUPPORT
return true;
#else
return false;
#endif
}
} // namespace gpu_dfg
} // namespace concretelang
} // namespace mlir

View File

@@ -41,14 +41,17 @@ RuntimeContext::RuntimeContext(ServerKeyset serverKeyset)
}
#ifdef CONCRETELANG_CUDA_SUPPORT
assert(cudaGetDeviceCount(&num_devices) == cudaSuccess);
bsk_gpu.resize(num_devices);
ksk_gpu.resize(num_devices);
for (int i = 0; i < num_devices; ++i) {
bsk_gpu[i].resize(serverKeyset.lweBootstrapKeys.size(), nullptr);
ksk_gpu[i].resize(serverKeyset.lweKeyswitchKeys.size(), nullptr);
bsk_gpu_mutex.push_back(std::make_unique<std::mutex>());
ksk_gpu_mutex.push_back(std::make_unique<std::mutex>());
if (cudaGetDeviceCount(&num_devices) == cudaSuccess) {
bsk_gpu.resize(num_devices);
ksk_gpu.resize(num_devices);
for (int i = 0; i < num_devices; ++i) {
bsk_gpu[i].resize(serverKeyset.lweBootstrapKeys.size(), nullptr);
ksk_gpu[i].resize(serverKeyset.lweKeyswitchKeys.size(), nullptr);
bsk_gpu_mutex.push_back(std::make_unique<std::mutex>());
ksk_gpu_mutex.push_back(std::make_unique<std::mutex>());
}
} else {
num_devices = 0;
}
#endif
}

View File

@@ -0,0 +1,21 @@
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
// Exceptions. See
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
// for license information.
#include "concretelang/Runtime/time_util.h"
#if CONCRETELANG_TIMING_ENABLED
namespace mlir {
namespace concretelang {
namespace time_util {
bool timing_enabled = false;
struct timespec timestamp;
} // namespace time_util
} // namespace concretelang
} // namespace mlir
#endif

View File

@@ -0,0 +1,15 @@
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
// Exceptions. See
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
// for license information.
#include "concretelang/Runtime/utils.h"
namespace mlir {
namespace concretelang {
void LLVMInitializeNativeTarget() {
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
}
} // namespace concretelang
} // namespace mlir

View File

@@ -112,7 +112,6 @@ void memref_batched_keyswitch_lwe_cuda_u64(
uint64_t ct0_stride0, uint64_t ct0_stride1, uint32_t level,
uint32_t base_log, uint32_t input_lwe_dim, uint32_t output_lwe_dim,
uint32_t ksk_index, mlir::concretelang::RuntimeContext *context) {
assert(ksk_index == 0 && "multiple ksk is not yet implemented on GPU");
assert(out_size0 == ct0_size0);
assert(out_size1 == output_lwe_dim + 1);
assert(ct0_size1 == input_lwe_dim + 1);
@@ -159,7 +158,6 @@ void memref_batched_bootstrap_lwe_cuda_u64(
uint64_t tlu_stride, uint32_t input_lwe_dim, uint32_t poly_size,
uint32_t level, uint32_t base_log, uint32_t glwe_dim, uint32_t bsk_index,
mlir::concretelang::RuntimeContext *context) {
assert(bsk_index == 0 && "multiple bsk is not yet implemented on GPU");
assert(out_size0 == ct0_size0);
assert(out_size1 == glwe_dim * poly_size + 1);
// TODO: Multi GPU
@@ -247,7 +245,6 @@ void memref_batched_mapped_bootstrap_lwe_cuda_u64(
uint32_t input_lwe_dim, uint32_t poly_size, uint32_t level,
uint32_t base_log, uint32_t glwe_dim, uint32_t bsk_index,
mlir::concretelang::RuntimeContext *context) {
assert(bsk_index == 0 && "multiple bsk is not yet implemented on GPU");
assert(out_size0 == ct0_size0);
assert(out_size1 == glwe_dim * poly_size + 1);
assert((out_size0 == tlu_size0 || tlu_size0 == 1) &&

View File

@@ -16,6 +16,7 @@
#include "concretelang/Common/Protocol.h"
#include "concretelang/Common/Transformers.h"
#include "concretelang/Common/Values.h"
#include "concretelang/Runtime/DFRuntime.hpp"
#include "concretelang/Runtime/context.h"
#include "concretelang/ServerLib/ServerLib.h"
#include "concretelang/Support/CompilerEngine.h"
@@ -355,6 +356,7 @@ DynamicModule::open(const std::string &sharedLibPath) {
if (!module->libraryHandle) {
return StringError("Cannot open shared library ") << dlerror();
}
mlir::concretelang::dfr::_dfr_register_lib(module->libraryHandle);
return module;
}
@@ -430,6 +432,13 @@ bool getGateIsSigned(const Message<concreteprotocol::GateInfo> &gateInfo) {
Result<std::vector<TransportValue>>
ServerCircuit::call(const ServerKeyset &serverKeyset,
std::vector<TransportValue> &args) {
std::vector<TransportValue> returns(returnsBuffer.size());
mlir::concretelang::dfr::_dfr_register_lib(dynamicModule->libraryHandle);
if (!mlir::concretelang::dfr::_dfr_is_root_node()) {
mlir::concretelang::dfr::_dfr_run_remote_scheduler();
return returns;
}
if (args.size() != argsBuffer.size()) {
return StringError("Called circuit with wrong number of arguments");
}
@@ -444,7 +453,6 @@ ServerCircuit::call(const ServerKeyset &serverKeyset,
invoke(serverKeyset);
// We process the return values to turn them into transport values.
std::vector<TransportValue> returns(returnsBuffer.size());
for (size_t i = 0; i < returnsBuffer.size(); i++) {
OUTCOME_TRY(returns[i], returnTransformers[i](returnsBuffer[i]));
}

View File

@@ -63,14 +63,10 @@
#include "concretelang/Support/LLVMEmitFile.h"
#include "concretelang/Support/Pipeline.h"
#include "concretelang/Support/Utils.h"
#include <concretelang/Runtime/GPUDFG.hpp>
namespace mlir {
namespace concretelang {
// TODO: should be removed when bufferization is not related to CAPI lowering
// Control whether we should call a cpu of gpu function when lowering
// to CAPI
static bool EMIT_GPU_OPS;
bool getEmitGPUOption() { return EMIT_GPU_OPS; }
/// Creates a new compilation context that can be shared across
/// compilation engines and results
@@ -297,9 +293,6 @@ CompilerEngine::compile(mlir::ModuleOp moduleOp, Target target,
mlir::MLIRContext &mlirContext = *this->compilationContext->getMLIRContext();
// enable/disable usage of gpu functions during bufferization
EMIT_GPU_OPS = options.emitGPUOps;
auto dataflowParallelize =
options.autoParallelize || options.dataflowParallelize;
auto loopParallelize = options.autoParallelize || options.loopParallelize;
@@ -307,6 +300,56 @@ CompilerEngine::compile(mlir::ModuleOp moduleOp, Target target,
if (loopParallelize)
mlir::concretelang::dfr::_dfr_set_use_omp(true);
// Sanity checks for enabling GPU usage: the compiler must have been
// compiled with Cuda support (especially important when building
// python wheels), and at least one device must be available to
// execute on.
if (options.emitGPUOps) {
// If this compiler is not compiled using Cuda support, then
// requesting GPU is forbidden - instead of a hard error, issue a
// warning and disable the GPU option.
if (!mlir::concretelang::gpu_dfg::check_cuda_runtime_enabled()) {
// Allow compilation to complete if only code generation is expected.
if (target != Target::LIBRARY) {
warnx("This instance of the Concrete compiler does not support GPU "
"acceleration."
" Allowing code generation to proceed, but execution will not be "
"possible.");
} else {
warnx("This instance of the Concrete compiler does not support GPU "
"acceleration."
" If you are using Concrete-Python, it means that the module "
"installed is not GPU enabled.\n"
"Continuing without GPU acceleration.");
options.emitGPUOps = false;
options.emitSDFGOps = false;
options.batchTFHEOps = false;
}
} else {
// Ensure that at least one Cuda device is available if GPU option
// is used
if (!mlir::concretelang::gpu_dfg::check_cuda_device_available()) {
warnx("No Cuda device available on this system (either not present or "
"the driver is not online).\n"
"Continuing without GPU acceleration.");
options.emitGPUOps = false;
options.emitSDFGOps = false;
options.batchTFHEOps = false;
}
}
// Finally for now we cannot allow dataflow parallelization at the
// same time as GPU usage. This restriction will be relaxed later.
if (dataflowParallelize) {
warnx("Dataflow parallelization and GPU offloading have both been "
"requested. This is not currently supported. Continuing without "
"dataflow parallelization.");
dataflowParallelize = false;
}
}
// If dataflow parallelization will proceed, mark it for
// initialising the runtime
if (dataflowParallelize)
mlir::concretelang::dfr::_dfr_set_required(true);
@@ -594,8 +637,11 @@ CompilerEngine::compile(mlir::ModuleOp moduleOp, Target target,
}
}
// Restrict direct lowering when already generating GPU code through
// the SDFG dialect.
bool lowerDirectlyToGPUOps = (options.emitGPUOps && !options.emitSDFGOps);
if (mlir::concretelang::pipeline::lowerToCAPI(mlirContext, module, enablePass,
options.emitGPUOps)
lowerDirectlyToGPUOps)
.failed()) {
return StreamStringError("Failed to lower to CAPI");
}

View File

@@ -47,8 +47,10 @@
#include "concretelang/Dialect/FHELinalg/Transforms/Tiling.h"
#include "concretelang/Dialect/RT/Analysis/Autopar.h"
#include "concretelang/Dialect/RT/Transforms/Passes.h"
#include "concretelang/Dialect/SDFG/Transforms/Passes.h"
#include "concretelang/Dialect/TFHE/Analysis/ExtractStatistics.h"
#include "concretelang/Dialect/TFHE/Transforms/Transforms.h"
#include "concretelang/Runtime/utils.h"
#include "concretelang/Support/CompilerEngine.h"
#include "concretelang/Support/Error.h"
#include "concretelang/Support/Pipeline.h"
@@ -361,6 +363,7 @@ mlir::LogicalResult batchTFHE(mlir::MLIRContext &context,
pm, mlir::concretelang::createCollapseParallelLoops(), enablePass);
addPotentiallyNestedPass(
pm, mlir::concretelang::createBatchingPass(maxBatchSize), enablePass);
addPotentiallyNestedPass(pm, mlir::createCanonicalizerPass(), enablePass);
return pm.run(module.getOperation());
}
@@ -569,6 +572,8 @@ mlir::LogicalResult lowerToStd(mlir::MLIRContext &context,
enablePass);
addPotentiallyNestedPass(
pm, mlir::concretelang::createFixupBufferDeallocationPass(), enablePass);
addPotentiallyNestedPass(
pm, mlir::concretelang::createSDFGBufferOwnershipPass(), enablePass);
return pm.run(module);
}
@@ -610,8 +615,7 @@ std::unique_ptr<llvm::Module>
lowerLLVMDialectToLLVMIR(mlir::MLIRContext &context,
llvm::LLVMContext &llvmContext,
mlir::ModuleOp &module) {
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
mlir::concretelang::LLVMInitializeNativeTarget();
mlir::registerLLVMDialectTranslation(*module->getContext());
mlir::registerOpenMPDialectTranslation(*module->getContext());

View File

@@ -23,39 +23,26 @@ public:
matchAndRewrite(mlir::scf::ForOp forOp,
mlir::PatternRewriter &rewriter) const override {
auto attr = forOp->getAttrOfType<mlir::BoolAttr>("parallel");
if (attr == nullptr) {
if (!attr || !attr.getValue()) {
return mlir::failure();
}
assert(forOp.getRegionIterArgs().size() == 0 &&
"unexpecting iter args when loops are bufferized");
if (attr.getValue()) {
rewriter.replaceOpWithNewOp<mlir::scf::ParallelOp>(
forOp, mlir::ValueRange{forOp.getLowerBound()},
mlir::ValueRange{forOp.getUpperBound()}, forOp.getStep(),
std::nullopt,
[&](mlir::OpBuilder &builder, mlir::Location location,
mlir::ValueRange indVar, mlir::ValueRange iterArgs) {
mlir::IRMapping map;
map.map(forOp.getInductionVar(), indVar.front());
for (auto &op : forOp.getRegion().front()) {
auto newOp = builder.clone(op, map);
map.map(op.getResults(), newOp->getResults());
}
});
} else {
rewriter.replaceOpWithNewOp<mlir::scf::ForOp>(
forOp, forOp.getLowerBound(), forOp.getUpperBound(), forOp.getStep(),
std::nullopt,
[&](mlir::OpBuilder &builder, mlir::Location location,
mlir::Value indVar, mlir::ValueRange iterArgs) {
mlir::IRMapping map;
map.map(forOp.getInductionVar(), indVar);
for (auto &op : forOp.getRegion().front()) {
auto newOp = builder.clone(op, map);
map.map(op.getResults(), newOp->getResults());
}
});
}
rewriter.replaceOpWithNewOp<mlir::scf::ParallelOp>(
forOp, mlir::ValueRange{forOp.getLowerBound()},
mlir::ValueRange{forOp.getUpperBound()}, forOp.getStep(), std::nullopt,
[&](mlir::OpBuilder &builder, mlir::Location location,
mlir::ValueRange indVar, mlir::ValueRange iterArgs) {
mlir::IRMapping map;
map.map(forOp.getInductionVar(), indVar.front());
for (auto &op : forOp.getRegion().front()) {
auto newOp = builder.clone(op, map);
map.map(op.getResults(), newOp->getResults());
}
});
return mlir::success();
}

View File

@@ -122,8 +122,7 @@ func.func @batch_offset_extract_keyswitch(%arg0: tensor<99x2x3x4x99x99x!TFHE.glw
%c97 = arith.constant 97 : index
%0 = bufferization.alloc_tensor() : tensor<2x3x4x!TFHE.glwe<sk<1,1,750>>>
// CHECK: %[[VDROP1DIMS:.*]] = tensor.collapse_shape [[ARG:.*]] {{\[\[0, 1\], \[2\], \[3, 4, 5\]\]}} : tensor<1x2x3x4x1x1x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<2x3x4x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
// CHECK: %[[V0:.*]] = tensor.collapse_shape %[[VDROP1DIMS]] {{\[\[0, 1, 2\]\]}} : tensor<2x3x4x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<24x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
// CHECK: %[[V0:.*]] = tensor.collapse_shape %[[SLICE:.*]] {{\[\[0, 1, 2, 3, 4, 5\]\]}} : tensor<1x2x3x4x1x1x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<24x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
// CHECK: %[[V1:.*]] = "TFHE.batched_keyswitch_glwe"(%[[V0]]) {key = #TFHE<ksk{{\[}}[[KSK:.*]]{{\]}}<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>, sk{{\[}}[[SK_OUT]]{{\]}}<1,750>, 3, 4>>} : (tensor<24x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>) -> tensor<24x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>>
// CHECK: %[[V2:.*]] = tensor.expand_shape %[[V1]] {{\[\[0, 1, 2\]\]}} : tensor<24x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>> into tensor<2x3x4x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>>
// CHECK: return %[[V2]]
@@ -161,8 +160,7 @@ func.func @batch_offset_shifted_bounds_nonunitstep_extract_keyswitch(%arg0: tens
%0 = bufferization.alloc_tensor() : tensor<2x2x2x!TFHE.glwe<sk<1,1,750>>>
// CHECK: %[[V1:.*]] = tensor.extract_slice %arg0{{\[0, 3, 7, 9, 97, 1\] \[1, 2, 2, 2, 1, 1\] \[1, 2, 1, 7, 1, 1\]}} : tensor<99x20x30x40x99x99x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> to tensor<1x2x2x2x1x1x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
// CHECK-NEXT: %[[V2:.*]] = tensor.collapse_shape %[[V1]] {{\[\[0, 1\], \[2\], \[3, 4, 5\]\]}} : tensor<1x2x2x2x1x1x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<2x2x2x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
// CHECK-NEXT: %[[V3:.*]] = tensor.collapse_shape %[[V2]] {{\[\[0, 1, 2\]\]}} : tensor<2x2x2x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<8x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
// CHECK-NEXT: %[[V3:.*]] = tensor.collapse_shape %[[V1]] {{\[\[0, 1, 2, 3, 4, 5\]\]}} : tensor<1x2x2x2x1x1x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<8x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
// CHECK-NEXT: %[[V4:.*]] = "TFHE.batched_keyswitch_glwe"(%[[V3]]) {key = #TFHE<ksk{{\[}}[[KSK:.*]]{{\]}}<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>, sk{{\[}}[[SK_OUT]]{{\]}}<1,750>, 3, 4>>} : (tensor<8x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>) -> tensor<8x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>>
// CHECK-NEXT: %[[V5:.*]] = tensor.expand_shape %[[V4]] {{\[\[0, 1, 2\]\]}} : tensor<8x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>> into tensor<2x2x2x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>>
// CHECK-NEXT: return %[[V5]] : tensor<2x2x2x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>>

View File

@@ -0,0 +1,18 @@
// RUN: concretecompiler --split-input-file --action=dump-std --parallelize --parallelize-loops --skip-program-info --passes=for-loop-to-parallel --skip-program-info %s 2>&1| FileCheck %s
func.func @bar() -> () {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
%i0 = arith.constant 0 : i32
%i1 = arith.constant 1 : i32
// CHECK-NOT: scf.parallel
%0 = scf.for %iv = %c0 to %c4 step %c1 iter_args(%ia = %i0) -> i32 {
"Tracing.trace_plaintext"(%i0) : (i32) -> ()
%yld = arith.addi %ia, %i1 : i32
scf.yield %yld : i32
} {"parallel" = false }
return
}

View File

@@ -54,15 +54,13 @@ static void BM_ExportArguments(benchmark::State &state,
inputArguments.reserve(test.inputs.size());
auto client = tc.getClientCircuit().value();
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
for (auto _ : state) {
for (size_t i = 0; i < test.inputs.size(); i++) {
auto input = client.prepareInput(test.inputs[i].getValue(), i).value();
inputArguments.push_back(input);
}
for (auto _ : state) {
for (size_t i = 0; i < test.inputs.size(); i++) {
auto input = client.prepareInput(test.inputs[i].getValue(), i).value();
inputArguments.push_back(input);
}
inputArguments.resize(0);
}
inputArguments.resize(0);
}
/// Benchmark time of the program evaluation
@@ -78,12 +76,10 @@ static void BM_Evaluate(benchmark::State &state, EndToEndDesc description,
auto inputArguments = std::vector<TransportValue>();
inputArguments.reserve(test.inputs.size());
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
for (size_t i = 0; i < test.inputs.size(); i++) {
auto input =
clientCircuit.prepareInput(test.inputs[i].getValue(), i).value();
inputArguments.push_back(input);
}
for (size_t i = 0; i < test.inputs.size(); i++) {
auto input =
clientCircuit.prepareInput(test.inputs[i].getValue(), i).value();
inputArguments.push_back(input);
}
auto serverCircuit = tc.getServerCircuit().value();

View File

@@ -26,14 +26,14 @@ program: |
p-error: 1e-06
tests:
- inputs:
- tensor: [-2, -1, -1, -2, -2, -1, -1, -1, -1, -1, -2, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -2, -2, -2]
- tensor: [-1, -1, -2, -2, -1, -2, -2, -1, -2, -1, -2, -1, -1, -2, -2, -2, -2, -1, -2, -1, -1, -2, -1, -1]
shape: [2,3,4]
signed: True
- tensor: [-2, -1, -2, -1, -1, -1, -2, -2, -1, -2, -2, -1, -1, -1, -2, -1]
- tensor: [-1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -2, -1, -2, -1, -2]
shape: [2,4,2]
signed: True
outputs:
- tensor: [11, 8, 9, 6, 8, 6, 9, 7, 6, 5, 12, 10]
- tensor: [8, 8, 8, 8, 8, 8, 9, 13, 7, 10, 7, 9]
shape: [2,3,2]
signed: True
---
@@ -63,14 +63,14 @@ program: |
p-error: 1e-06
tests:
- inputs:
- tensor: [-1, -2, -2, -1, -2, -1, -2, -2, -1, -1, -1, -1]
- tensor: [-2, -1, -1, -1, -1, -2, -1, -1, -1, -1, -2, -2]
shape: [3,4]
signed: True
- tensor: [-2, -2, -2, -1, -1, -2, -1, -1]
- tensor: [-2, -2, -1, -1, -1, -2, -2, -2]
shape: [4,2]
signed: True
outputs:
- tensor: [9, 9, 10, 11, 6, 6]
- tensor: [8, 9, 7, 8, 9, 11]
shape: [3,2]
signed: True
---
@@ -100,14 +100,14 @@ program: |
p-error: 1e-06
tests:
- inputs:
- tensor: [-1, -2, -1]
- tensor: [-1, -2, -2]
shape: [3]
signed: True
- tensor: [-2, -1, -2, -2, -2, -2, -2, -1, -1, -1, -2, -2, -2, -2, -2, -1, -2, -2, -1, -1, -1, -2, -2, -1]
- tensor: [-2, -2, -1, -2, -1, -1, -2, -1, -2, -2, -1, -2, -2, -2, -2, -1, -1, -2, -1, -1, -1, -1, -2, -1]
shape: [4,3,2]
signed: True
outputs:
- tensor: [8, 7, 6, 5, 8, 6, 5, 6]
- tensor: [6, 8, 8, 9, 8, 8, 7, 5]
shape: [4,2]
signed: True
---
@@ -137,14 +137,14 @@ program: |
p-error: 1e-06
tests:
- inputs:
- tensor: [-1, -2, -1, -2, -2, -2, -2, -1, -2, -2, -2, -2, -1, -2, -1, -1, -2, -2, -1, -1, -2, -2, -2, -2]
- tensor: [-2, -1, -1, -2, -2, -1, -1, -2, -1, -2, -2, -2, -1, -2, -2, -2, -2, -1, -1, -2, -2, -2, -1, -1]
shape: [2,3,4]
signed: True
- tensor: [-1, -2, -1, -1]
- tensor: [-2, -2, -2, -2]
shape: [4]
signed: True
outputs:
- tensor: [8, 9, 10, 7, 8, 10]
- tensor: [12, 12, 14, 14, 12, 12]
shape: [2,3]
signed: True
---
@@ -174,14 +174,14 @@ program: |
p-error: 1e-06
tests:
- inputs:
- tensor: [-2, -1, -2, -1, -1, -1, -2, -2, -2, -2, -2, -1, -2, -1, -2, -1, -1, -1, -1, -2, -2, -1, -2, -2]
- tensor: [-2, -2, -2, -1, -1, -1, -2, -1, -1, -2, -2, -2, -1, -1, -2, -2, -2, -1, -1, -1, -1, -2, -1, -2]
shape: [2,1,3,4]
signed: True
- tensor: [-2, -1, -2, -2, -2, -1, -1, -1, -2, -2, -1, -1, -2, -2, -2, -1, -2, -1, -2, -2, -2, -1, -2, -2, -1, -2, -2, -2, -2, -2, -1, -1, -1, -2, -2, -1, -2, -1, -1, -1]
- tensor: [-2, -1, -2, -1, -2, -2, -1, -2, -1, -2, -2, -2, -2, -2, -1, -2, -1, -1, -1, -2, -2, -1, -2, -2, -2, -2, -2, -1, -2, -1, -1, -2, -1, -2, -1, -1, -1, -1, -2, -1]
shape: [5,4,2]
signed: True
outputs:
- tensor: [11, 7, 10, 7, 13, 9, 11, 10, 11, 9, 12, 11, 12, 8, 12, 9, 14, 10, 9, 11, 9, 10, 11, 13, 9, 8, 9, 7, 11, 9, 11, 7, 8, 6, 12, 8, 11, 10, 9, 7, 13, 11, 12, 8, 10, 8, 14, 10, 9, 11, 7, 8, 10, 12, 9, 8, 7, 6, 10, 9]
- tensor: [13, 10, 9, 8, 12, 11, 11, 14, 8, 10, 11, 14, 10, 10, 8, 7, 11, 11, 13, 10, 9, 7, 12, 10, 8, 9, 6, 6, 9, 8, 10, 10, 9, 7, 10, 9, 9, 12, 7, 10, 9, 12, 10, 9, 7, 7, 9, 10, 10, 9, 9, 8, 10, 9, 8, 7, 6, 7, 8, 7]
shape: [2,5,3,2]
signed: True
---
@@ -212,10 +212,10 @@ program: |
p-error: 1e-06
tests:
- inputs:
- tensor: [-1, -2, -2]
- tensor: [-2, -2, -1]
shape: [3]
signed: True
- tensor: [-1, -1, -2]
- tensor: [-2, -1, -1]
shape: [3]
signed: True
outputs:

View File

@@ -28,9 +28,3 @@ add_concretecompiler_unittest(end_to_end_jit_test end_to_end_jit_test.cc globals
add_concretecompiler_unittest(end_to_end_test end_to_end_test.cc globals.cc)
add_concretecompiler_unittest(end_to_end_jit_lambda end_to_end_jit_lambda.cc globals.cc)
if(CONCRETELANG_DATAFLOW_EXECUTION_ENABLED)
add_concretecompiler_unittest(end_to_end_jit_auto_parallelization end_to_end_jit_auto_parallelization.cc globals.cc)
add_concretecompiler_unittest(end_to_end_jit_distributed end_to_end_jit_distributed.cc globals.cc)
add_concretecompiler_unittest(end_to_end_jit_aes_short end_to_end_jit_aes_short.cc globals.cc)
endif()

View File

@@ -1,167 +0,0 @@
#include <concretelang/Runtime/DFRuntime.hpp>
#include <cstdint>
#include <gtest/gtest.h>
#include <tuple>
#include <type_traits>
#include "concretelang/TestLib/TestProgram.h"
#include "end_to_end_jit_test.h"
#include "tests_tools/GtestEnvironment.h"
///////////////////////////////////////////////////////////////////////////////
// Auto-parallelize independent FHE ops /////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
TEST(ParallelizeAndRunFHE, add_eint_tree) {
checkedJit(testCircuit, R"XXX(
func.func @main(%arg0: !FHE.eint<7>, %arg1: !FHE.eint<7>, %arg2: !FHE.eint<7>, %arg3: !FHE.eint<7>) -> !FHE.eint<7> {
%1 = "FHE.add_eint"(%arg0, %arg1): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%2 = "FHE.add_eint"(%arg0, %arg2): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%3 = "FHE.add_eint"(%arg0, %arg3): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%4 = "FHE.add_eint"(%arg1, %arg2): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%5 = "FHE.add_eint"(%arg1, %arg3): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%6 = "FHE.add_eint"(%arg2, %arg3): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%7 = "FHE.add_eint"(%1, %2): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%8 = "FHE.add_eint"(%1, %3): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%9 = "FHE.add_eint"(%1, %4): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%10 = "FHE.add_eint"(%1, %5): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%11 = "FHE.add_eint"(%1, %6): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%12 = "FHE.add_eint"(%2, %3): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%13 = "FHE.add_eint"(%2, %4): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%14 = "FHE.add_eint"(%2, %5): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%15 = "FHE.add_eint"(%2, %6): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%16 = "FHE.add_eint"(%3, %4): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%17 = "FHE.add_eint"(%3, %5): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%18 = "FHE.add_eint"(%3, %6): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%19 = "FHE.add_eint"(%4, %5): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%20 = "FHE.add_eint"(%4, %6): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%21 = "FHE.add_eint"(%5, %6): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%22 = "FHE.add_eint"(%7, %8): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%23 = "FHE.add_eint"(%9, %10): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%24 = "FHE.add_eint"(%11, %12): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%25 = "FHE.add_eint"(%13, %14): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%26 = "FHE.add_eint"(%15, %16): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%27 = "FHE.add_eint"(%17, %18): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%28 = "FHE.add_eint"(%19, %20): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%29 = "FHE.add_eint"(%22, %23): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%30 = "FHE.add_eint"(%24, %25): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%31 = "FHE.add_eint"(%26, %27): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%32 = "FHE.add_eint"(%21, %28): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%33 = "FHE.add_eint"(%29, %30): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%34 = "FHE.add_eint"(%31, %32): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
%35 = "FHE.add_eint"(%33, %34): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
return %35: !FHE.eint<7>
}
)XXX",
"main", false, true, false, false, 1e-40);
auto lambda = [&](std::vector<concretelang::values::Value> args) {
return testCircuit.call(args)
.value()[0]
.template getTensor<uint64_t>()
.value()[0];
};
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
ASSERT_EQ(lambda({Tensor<uint64_t>(1), Tensor<uint64_t>(2),
Tensor<uint64_t>(3), Tensor<uint64_t>(4)}),
(uint64_t)150);
ASSERT_EQ(lambda({Tensor<uint64_t>(4), Tensor<uint64_t>(5),
Tensor<uint64_t>(6), Tensor<uint64_t>(7)}),
(uint64_t)74);
ASSERT_EQ(lambda({Tensor<uint64_t>(1), Tensor<uint64_t>(1),
Tensor<uint64_t>(1), Tensor<uint64_t>(1)}),
(uint64_t)60);
ASSERT_EQ(lambda({Tensor<uint64_t>(5), Tensor<uint64_t>(7),
Tensor<uint64_t>(11), Tensor<uint64_t>(13)}),
(uint64_t)28);
} else {
ASSERT_OUTCOME_HAS_FAILURE(testCircuit.call({}));
ASSERT_OUTCOME_HAS_FAILURE(testCircuit.call({}));
ASSERT_OUTCOME_HAS_FAILURE(testCircuit.call({}));
ASSERT_OUTCOME_HAS_FAILURE(testCircuit.call({}));
}
}
std::vector<uint64_t> parallel_results;
TEST(ParallelizeAndRunFHE, nn_small_parallel) {
checkedJit(lambda, R"XXX(
func.func @main(%arg0: tensor<4x5x!FHE.eint<5>>) -> tensor<4x7x!FHE.eint<5>> {
%cst = arith.constant dense<[[0, 0, 1, 0, 1, 1, 0], [1, 1, 1, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1]]> : tensor<4x7xi6>
%cst_0 = arith.constant dense<[[1, 0, 1, 1, 0, 1, 1], [0, 1, 0, 0, 0, 0, 1], [0, 1, 1, 1, 1, 0, 0], [0, 1, 1, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0, 1]]> : tensor<5x7xi6>
%0 = "FHELinalg.matmul_eint_int"(%arg0, %cst_0) : (tensor<4x5x!FHE.eint<5>>, tensor<5x7xi6>) -> tensor<4x7x!FHE.eint<5>>
%1 = "FHELinalg.add_eint_int"(%0, %cst) : (tensor<4x7x!FHE.eint<5>>, tensor<4x7xi6>) -> tensor<4x7x!FHE.eint<5>>
%cst_1 = arith.constant dense<[0, 3, 7, 10, 14, 17, 21, 24, 28, 31, 35, 38, 42, 45, 49, 52, 56, 59, 63, 66, 70, 73, 77, 80, 84, 87, 91, 94, 98, 101, 105, 108]> : tensor<32xi64>
%2 = "FHELinalg.apply_lookup_table"(%1, %cst_1) : (tensor<4x7x!FHE.eint<5>>, tensor<32xi64>) -> tensor<4x7x!FHE.eint<5>>
return %2 : tensor<4x7x!FHE.eint<5>>
}
)XXX",
"main", false, true, true);
const size_t dim0 = 4;
const size_t dim1 = 5;
const size_t dim2 = 7;
const std::vector<size_t> inputShape({dim0, dim1});
const std::vector<size_t> outputShape({dim0, dim2});
std::vector<uint64_t> values;
values.reserve(dim0 * dim1);
for (size_t i = 0; i < dim0 * dim1; ++i) {
values.push_back(i % 17 % 4);
}
auto input = Tensor<uint64_t>(values, inputShape);
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
auto maybeResult = lambda.call({input});
ASSERT_OUTCOME_HAS_VALUE(maybeResult);
auto result = maybeResult.value()[0].template getTensor<uint64_t>().value();
ASSERT_EQ(result.dimensions, outputShape);
parallel_results = result.values;
} else {
ASSERT_OUTCOME_HAS_FAILURE(lambda.call({}));
}
}
TEST(ParallelizeAndRunFHE, nn_small_sequential) {
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
checkedJit(lambda, R"XXX(
func.func @main(%arg0: tensor<4x5x!FHE.eint<5>>) -> tensor<4x7x!FHE.eint<5>> {
%cst = arith.constant dense<[[0, 0, 1, 0, 1, 1, 0], [1, 1, 1, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1]]> : tensor<4x7xi6>
%cst_0 = arith.constant dense<[[1, 0, 1, 1, 0, 1, 1], [0, 1, 0, 0, 0, 0, 1], [0, 1, 1, 1, 1, 0, 0], [0, 1, 1, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0, 1]]> : tensor<5x7xi6>
%0 = "FHELinalg.matmul_eint_int"(%arg0, %cst_0) : (tensor<4x5x!FHE.eint<5>>, tensor<5x7xi6>) -> tensor<4x7x!FHE.eint<5>>
%1 = "FHELinalg.add_eint_int"(%0, %cst) : (tensor<4x7x!FHE.eint<5>>, tensor<4x7xi6>) -> tensor<4x7x!FHE.eint<5>>
%cst_1 = arith.constant dense<[0, 3, 7, 10, 14, 17, 21, 24, 28, 31, 35, 38, 42, 45, 49, 52, 56, 59, 63, 66, 70, 73, 77, 80, 84, 87, 91, 94, 98, 101, 105, 108]> : tensor<32xi64>
%2 = "FHELinalg.apply_lookup_table"(%1, %cst_1) : (tensor<4x7x!FHE.eint<5>>, tensor<32xi64>) -> tensor<4x7x!FHE.eint<5>>
return %2 : tensor<4x7x!FHE.eint<5>>
}
)XXX",
"main", false, false, false);
const size_t dim0 = 4;
const size_t dim1 = 5;
const size_t dim2 = 7;
const std::vector<size_t> inputShape({dim0, dim1});
const std::vector<size_t> outputShape({dim0, dim2});
std::vector<uint64_t> values;
values.reserve(dim0 * dim1);
for (size_t i = 0; i < dim0 * dim1; ++i) {
values.push_back(i % 17 % 4);
}
auto input = Tensor<uint64_t>(values, inputShape);
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
auto maybeResult = lambda.call({input});
ASSERT_OUTCOME_HAS_VALUE(maybeResult);
auto result =
maybeResult.value()[0].template getTensor<uint64_t>().value();
for (size_t i = 0; i < dim0 * dim2; i++)
EXPECT_EQ(parallel_results[i], result.values[i])
<< "result differ at pos " << i;
}
}
}

View File

@@ -1,148 +0,0 @@
#include <concretelang/Runtime/DFRuntime.hpp>
#include <cstdint>
#include <gtest/gtest.h>
#include <tuple>
#include <type_traits>
#include "concretelang/TestLib/TestProgram.h"
#include "end_to_end_jit_test.h"
#include "tests_tools/GtestEnvironment.h"
///////////////////////////////////////////////////////////////////////////////
// Auto-parallelize independent FHE ops /////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
std::vector<uint64_t> distributed_results;
TEST(Distributed, nn_med_nested) {
checkedJit(lambda, R"XXX(
func.func @main(%arg0: tensor<200x4x!FHE.eint<4>>) -> tensor<200x8x!FHE.eint<4>> {
%cst = arith.constant dense<"0x01010100010100000001010101000101010101010101010001000101000001010001010100000101000001000001010001000001010100010001000000010100010001010001000001000101010101000100010001000000000100010001000101000001000101010100010001000000000101000100000000000001000100000100000100000001010000010001000101000100010001000100000100000100010101010000000000000000010001010000000100000100010100000100000000010001000101000100000000000101010101000101010101010100010100010100000000000101010100000100010100000001000101000000010101000101000100000101010100010101010000010101010100010000000000000001010101000100010101000001010001010000010001010101000000000000000001000001000000010100000100000101010100010001000000000000010100010101000000010100000100010001010001000000000100010001000101010100010100000001010100010101010100010100010001000001000000000101000101010001000100000101010100000101010100000100010101000100000101000101010100010001000101010100010001010001010000010000010001010000000001000101010001000000000101000000010000010100010001000001000001010101000100010001010100000101000000010001000000000101000101000000010000000001000101010100010001000000000001010000010001000001010101000101010101010100000000000001000100000100000001000000010101010101000000000101010101000100000101000100000000000001000100000101000101010100010000000101000000000100000100000101010000010100000000010000000000010001000100000101010001010101000000000000010000010101010001000000010001010001010000000000000101000000010101010101000001010101000001000001010100000000010001010100000100000101000101010100010001010001000001000100000101000100010100000100010000000101000000010000010001010101010000000101000000010101000001010100000100010001000000000001010000000100010000000000000000000000000001010101010101010101000001010101000001010100000001000101010101010000010101000101010100010101010000010101010100000100000000000101010000000000010101010000000001000000010100000100000001000101010000000001000001000001010001010000010001000101010001010001010101000100010000000100000100010101000000000101010101010001000100000000000101010000010101000001010001010000000001010100000101000001010000000001010101000100010000010101000000000001000101000001010101000101000001000001000000010100010001000101010100010001010000000101000000010001000001000100000101010001000001000001000101010000010001000001000101000000000000000101010000010000000101010100010100010001010101010000000000010001000101010000000001010100000000010001010100010001000001000101000000010100010000010000010001010100010000010001010100010000010100010101010001000100010100010101000100000101010100000100010100000100000000010101000000010001000001010000000101000100000100010101000000010100000101000001010001010100010000000101010000000001010001000000010100010101010001000100010001000001010101000000010001000100000100010101000000000000010100010000000100000000010100010000000100000101010000010101000100010000010100000001000100000000000100000001010101010101000100010001000000010101010100000001000001000001010001000101010100000001010001010100010101000101000000010001010100010101000100000101000101000001000001000001000101010100010001010000000100000101010100000001000000000000010101000100010001000001000001000000000000010100000100000001"> : tensor<200x8xi5>
%cst_0 = arith.constant dense<[[1, 0, 0, 0, 1, 0, 0, 1], [0, 0, 1, 1, 0, 0, 0, 0], [1, 1, 0, 1, 1, 0, 1, 1], [1, 1, 0, 0, 1, 0, 1, 1]]> : tensor<4x8xi5>
%cst_1 = arith.constant dense<[0, 3, 7, 10, 14, 17, 21, 24, 28, 31, 35, 38, 42, 45, 49, 52]> : tensor<16xi64>
%0 = "FHELinalg.matmul_eint_int"(%arg0, %cst_0) : (tensor<200x4x!FHE.eint<4>>, tensor<4x8xi5>) -> tensor<200x8x!FHE.eint<4>>
%1 = "FHELinalg.add_eint_int"(%0, %cst) : (tensor<200x8x!FHE.eint<4>>, tensor<200x8xi5>) -> tensor<200x8x!FHE.eint<4>>
%res = "FHE.zero_tensor"() : () -> tensor<200x8x!FHE.eint<4>>
%slice_A = tensor.extract_slice %1[0, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_B = tensor.extract_slice %1[25, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_C = tensor.extract_slice %1[50, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_D = tensor.extract_slice %1[75, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_E = tensor.extract_slice %1[100, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_F = tensor.extract_slice %1[125, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_G = tensor.extract_slice %1[150, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_H = tensor.extract_slice %1[175, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_I = tensor.extract_slice %1[0, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_J = tensor.extract_slice %1[25, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_K = tensor.extract_slice %1[50, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_L = tensor.extract_slice %1[75, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_M = tensor.extract_slice %1[100, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_N = tensor.extract_slice %1[125, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_O = tensor.extract_slice %1[150, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%slice_P = tensor.extract_slice %1[175, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
%part_A = "FHELinalg.apply_lookup_table"(%slice_A, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_B = "FHELinalg.apply_lookup_table"(%slice_B, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_C = "FHELinalg.apply_lookup_table"(%slice_C, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_D = "FHELinalg.apply_lookup_table"(%slice_D, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_E = "FHELinalg.apply_lookup_table"(%slice_E, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_F = "FHELinalg.apply_lookup_table"(%slice_F, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_G = "FHELinalg.apply_lookup_table"(%slice_G, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_H = "FHELinalg.apply_lookup_table"(%slice_H, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_I = "FHELinalg.apply_lookup_table"(%slice_I, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_J = "FHELinalg.apply_lookup_table"(%slice_J, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_K = "FHELinalg.apply_lookup_table"(%slice_K, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_L = "FHELinalg.apply_lookup_table"(%slice_L, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_M = "FHELinalg.apply_lookup_table"(%slice_M, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_N = "FHELinalg.apply_lookup_table"(%slice_N, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_O = "FHELinalg.apply_lookup_table"(%slice_O, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%part_P = "FHELinalg.apply_lookup_table"(%slice_P, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
%res_A = tensor.insert_slice %part_A into %res [0, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_B = tensor.insert_slice %part_B into %res_A[25, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_C = tensor.insert_slice %part_C into %res_B[50, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_D = tensor.insert_slice %part_D into %res_C[75, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_E = tensor.insert_slice %part_E into %res_D[100, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_F = tensor.insert_slice %part_F into %res_E[125, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_G = tensor.insert_slice %part_G into %res_F[150, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_H = tensor.insert_slice %part_H into %res_G[175, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_I = tensor.insert_slice %part_I into %res_H[0, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_J = tensor.insert_slice %part_J into %res_I[25, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_K = tensor.insert_slice %part_K into %res_J[50, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_L = tensor.insert_slice %part_L into %res_K[75, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_M = tensor.insert_slice %part_M into %res_L[100, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_N = tensor.insert_slice %part_N into %res_M[125, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_O = tensor.insert_slice %part_O into %res_N[150, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
%res_P = tensor.insert_slice %part_P into %res_O[175, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
return %res_P : tensor<200x8x!FHE.eint<4>>
}
)XXX",
"main", false, true, true, DEFAULT_batchTFHEOps,
DEFAULT_global_p_error, DEFAULT_chunkedIntegers, DEFAULT_chunkSize,
DEFAULT_chunkWidth, false);
const size_t dim0 = 200;
const size_t dim1 = 4;
const size_t dim2 = 8;
const std::vector<size_t> inputShape({dim0, dim1});
const std::vector<size_t> outputShape({dim0, dim2});
std::vector<uint64_t> values;
values.reserve(dim0 * dim1);
for (size_t i = 0; i < dim0 * dim1; ++i) {
values.push_back(i % 17 % 4);
}
auto input = Tensor<uint64_t>(values, inputShape);
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
auto maybeResult = lambda.call({input});
ASSERT_OUTCOME_HAS_VALUE(maybeResult);
auto result = maybeResult.value()[0].template getTensor<uint64_t>().value();
ASSERT_EQ(result.dimensions, outputShape);
distributed_results = result.values;
} else {
ASSERT_OUTCOME_HAS_VALUE(lambda.call({}));
}
}
TEST(Distributed, nn_med_sequential) {
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
checkedJit(lambda, R"XXX(
func.func @main(%arg0: tensor<200x4x!FHE.eint<4>>) -> tensor<200x8x!FHE.eint<4>> {
%cst = arith.constant dense<"0x01010100010100000001010101000101010101010101010001000101000001010001010100000101000001000001010001000001010100010001000000010100010001010001000001000101010101000100010001000000000100010001000101000001000101010100010001000000000101000100000000000001000100000100000100000001010000010001000101000100010001000100000100000100010101010000000000000000010001010000000100000100010100000100000000010001000101000100000000000101010101000101010101010100010100010100000000000101010100000100010100000001000101000000010101000101000100000101010100010101010000010101010100010000000000000001010101000100010101000001010001010000010001010101000000000000000001000001000000010100000100000101010100010001000000000000010100010101000000010100000100010001010001000000000100010001000101010100010100000001010100010101010100010100010001000001000000000101000101010001000100000101010100000101010100000100010101000100000101000101010100010001000101010100010001010001010000010000010001010000000001000101010001000000000101000000010000010100010001000001000001010101000100010001010100000101000000010001000000000101000101000000010000000001000101010100010001000000000001010000010001000001010101000101010101010100000000000001000100000100000001000000010101010101000000000101010101000100000101000100000000000001000100000101000101010100010000000101000000000100000100000101010000010100000000010000000000010001000100000101010001010101000000000000010000010101010001000000010001010001010000000000000101000000010101010101000001010101000001000001010100000000010001010100000100000101000101010100010001010001000001000100000101000100010100000100010000000101000000010000010001010101010000000101000000010101000001010100000100010001000000000001010000000100010000000000000000000000000001010101010101010101000001010101000001010100000001000101010101010000010101000101010100010101010000010101010100000100000000000101010000000000010101010000000001000000010100000100000001000101010000000001000001000001010001010000010001000101010001010001010101000100010000000100000100010101000000000101010101010001000100000000000101010000010101000001010001010000000001010100000101000001010000000001010101000100010000010101000000000001000101000001010101000101000001000001000000010100010001000101010100010001010000000101000000010001000001000100000101010001000001000001000101010000010001000001000101000000000000000101010000010000000101010100010100010001010101010000000000010001000101010000000001010100000000010001010100010001000001000101000000010100010000010000010001010100010000010001010100010000010100010101010001000100010100010101000100000101010100000100010100000100000000010101000000010001000001010000000101000100000100010101000000010100000101000001010001010100010000000101010000000001010001000000010100010101010001000100010001000001010101000000010001000100000100010101000000000000010100010000000100000000010100010000000100000101010000010101000100010000010100000001000100000000000100000001010101010101000100010001000000010101010100000001000001000001010001000101010100000001010001010100010101000101000000010001010100010101000100000101000101000001000001000001000101010100010001010000000100000101010100000001000000000000010101000100010001000001000001000000000000010100000100000001"> : tensor<200x8xi5>
%cst_0 = arith.constant dense<[[1, 0, 0, 0, 1, 0, 0, 1], [0, 0, 1, 1, 0, 0, 0, 0], [1, 1, 0, 1, 1, 0, 1, 1], [1, 1, 0, 0, 1, 0, 1, 1]]> : tensor<4x8xi5>
%0 = "FHELinalg.matmul_eint_int"(%arg0, %cst_0) : (tensor<200x4x!FHE.eint<4>>, tensor<4x8xi5>) -> tensor<200x8x!FHE.eint<4>>
%1 = "FHELinalg.add_eint_int"(%0, %cst) : (tensor<200x8x!FHE.eint<4>>, tensor<200x8xi5>) -> tensor<200x8x!FHE.eint<4>>
%cst_1 = arith.constant dense<[0, 3, 7, 10, 14, 17, 21, 24, 28, 31, 35, 38, 42, 45, 49, 52]> : tensor<16xi64>
%2 = "FHELinalg.apply_lookup_table"(%1, %cst_1) : (tensor<200x8x!FHE.eint<4>>, tensor<16xi64>) -> tensor<200x8x!FHE.eint<4>>
return %2 : tensor<200x8x!FHE.eint<4>>
}
)XXX",
"main", false, false, false, DEFAULT_batchTFHEOps,
DEFAULT_global_p_error, DEFAULT_chunkedIntegers,
DEFAULT_chunkSize, DEFAULT_chunkWidth, false);
const size_t dim0 = 200;
const size_t dim1 = 4;
const size_t dim2 = 8;
const std::vector<size_t> inputShape({dim0, dim1});
const std::vector<size_t> outputShape({dim0, dim2});
std::vector<uint64_t> values;
values.reserve(dim0 * dim1);
for (size_t i = 0; i < dim0 * dim1; ++i) {
values.push_back(i % 17 % 4);
}
auto input = Tensor<uint64_t>(values, inputShape);
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
auto maybeResult = lambda.call({input});
ASSERT_OUTCOME_HAS_VALUE(maybeResult);
auto result =
maybeResult.value()[0].template getTensor<uint64_t>().value();
for (size_t i = 0; i < dim0 * dim2; i++)
EXPECT_EQ(distributed_results[i], result.values[i])
<< "result differ at pos " << i;
}
}
}

View File

@@ -1,23 +0,0 @@
#!/bin/bash
#SBATCH --job-name=end_to_end_jit_distributed
#SBATCH --mail-type=BEGIN,END,FAIL
#SBATCH --mail-user=antoniu.pop@zama.ai
#SBATCH --nodes=4
#SBATCH --cpus-per-task=8
#SBATCH --time=00:45:00
#SBATCH --output=end_to_end_jit_distributed_%j.log
echo "Date = $(date)"
echo "Hostname = $(hostname -s)"
echo "Working Directory = $(pwd)"
echo ""
echo "Number of Nodes Allocated = $SLURM_JOB_NUM_NODES"
echo "Number of Tasks Allocated = $SLURM_NTASKS"
echo "Number of Cores/Task Allocated = $SLURM_CPUS_PER_TASK"
export OMP_NUM_THREADS=8
export DFR_NUM_THREADS=2
srun ./build/bin/end_to_end_jit_distributed
date

View File

@@ -407,8 +407,10 @@ func.func @main(%arg0: !FHE.eint<3>) -> !FHE.eint<3> {
}
)XXX");
ASSERT_OUTCOME_HAS_FAILURE_WITH_ERRORMSG(
err, "Program can not be composed: Dag is not composable, because of "
"output 1: Partition 0 has input coefficient 4");
err, "Program can not be composed: At -:4:8: please add "
"`fhe.refresh(...)` to guarantee the function composability.\n"
"The noise of the node 0 is contaminated by noise coming straight "
"from the input (partition: 0, coeff: 4.00).");
}
TEST(CompileNotComposable, not_composable_2) {
@@ -428,8 +430,10 @@ func.func @main(%arg0: !FHE.eint<3>) -> (!FHE.eint<3>, !FHE.eint<3>) {
}
)XXX");
ASSERT_OUTCOME_HAS_FAILURE_WITH_ERRORMSG(
err, "Program can not be composed: Dag is not composable, because of "
"output 1: Partition 0 has input coefficient 4");
err, "Program can not be composed: At -:5:8: please add "
"`fhe.refresh(...)` to guarantee the function composability.\n"
"The noise of the node 0 is contaminated by noise coming straight "
"from the input (partition: 0, coeff: 4.00).");
}
TEST(CompileComposable, composable_supported_v0) {

View File

@@ -6,7 +6,6 @@
#include <type_traits>
#include "concretelang/Common/Values.h"
#include "concretelang/Runtime/DFRuntime.hpp"
#include "concretelang/Support/CompilationFeedback.h"
#include "concretelang/TestLib/TestProgram.h"
#include "end_to_end_fixture/EndToEndFixture.h"
@@ -59,18 +58,10 @@ public:
void testOnce() {
for (auto tests_rep = 0; tests_rep <= options.numberOfRetry; tests_rep++) {
// We execute the circuit.
auto maybeRes =
testCircuit->call((mlir::concretelang::dfr::_dfr_is_root_node())
? args
: std::vector<Value>());
if (!mlir::concretelang::dfr::_dfr_is_root_node())
return;
auto maybeRes = testCircuit->call(args);
ASSERT_OUTCOME_HAS_VALUE(maybeRes);
auto result = maybeRes.value();
if (!mlir::concretelang::dfr::_dfr_is_root_node())
return;
/* Check results */
bool allgood = true;
for (size_t i = 0; i < desc.outputs.size(); i++) {

View File

@@ -36,16 +36,16 @@ Result<TestProgram> setupTestProgram(std::string source,
auto outputs = circuitEncoding.asBuilder().initOutputs(1);
circuitEncoding.asBuilder().setName(funcname);
auto encodingInfo = Message<concreteprotocol::EncodingInfo>().asBuilder();
encodingInfo.initShape();
auto integer = encodingInfo.getEncoding().initIntegerCiphertext();
auto encodingInfo = Message<concreteprotocol::EncodingInfo>();
encodingInfo.asBuilder().initShape();
auto integer = encodingInfo.asBuilder().getEncoding().initIntegerCiphertext();
integer.getMode().initNative();
integer.setWidth(3);
integer.setIsSigned(false);
inputs.setWithCaveats(0, encodingInfo);
inputs.setWithCaveats(1, encodingInfo);
outputs.setWithCaveats(0, encodingInfo);
inputs.setWithCaveats(0, encodingInfo.asReader());
inputs.setWithCaveats(1, encodingInfo.asReader());
outputs.setWithCaveats(0, encodingInfo.asReader());
options.encodings = Message<concreteprotocol::ProgramEncodingInfo>();
options.encodings->asBuilder().initCircuits(1).setWithCaveats(
@@ -61,14 +61,14 @@ Result<TestProgram> setupTestProgram(std::string source,
TEST(Encodings_unit_tests, multi_key) {
std::string source = R"(
func.func @main(
%arg0: !TFHE.glwe<sk<1,1,2048>>,
%arg0: !TFHE.glwe<sk<1,1,2048>>,
%arg1: !TFHE.glwe<sk<2,1,2048>>
) -> !TFHE.glwe<sk<2,1,2048>> {
%0 = "TFHE.keyswitch_glwe"(%arg0) {key=#TFHE.ksk<sk<1,1,2048>, sk<2, 1,2048>, 7, 2>} : (!TFHE.glwe<sk<1, 1, 2048>>) -> !TFHE.glwe<sk<2, 1, 2048>>
%1 = "TFHE.add_glwe"(%arg1, %0) : (!TFHE.glwe<sk<2,1,2048>>, !TFHE.glwe<sk<2,1,2048>>) -> !TFHE.glwe<sk<2,1,2048>>
return %1 : !TFHE.glwe<sk<2,1,2048>>
}
)";
ASSERT_ASSIGN_OUTCOME_VALUE(circuit, setupTestProgram(source));

View File

@@ -1,3 +1,6 @@
#![allow(clippy::boxed_local)]
#![allow(clippy::too_many_arguments)]
use concrete_optimizer::computing_cost::cpu::CpuComplexity;
use concrete_optimizer::config;
use concrete_optimizer::config::ProcessingUnit;
@@ -42,7 +45,7 @@ fn caches_from(options: ffi::Options) -> decomposition::PersistDecompCaches {
decomposition::cache(
options.security_level,
processing_unit,
Some(ProcessingUnit::Cpu.complexity_model()),
Some(processing_unit.complexity_model()),
options.cache_on_disk,
options.ciphertext_modulus_log,
options.fft_precision,
@@ -612,12 +615,19 @@ impl Dag {
pub struct DagBuilder<'dag>(unparametrized::DagBuilder<'dag>);
impl<'dag> DagBuilder<'dag> {
fn add_input(&mut self, out_precision: Precision, out_shape: &[u64]) -> ffi::OperatorIndex {
fn add_input(
&mut self,
out_precision: Precision,
out_shape: &[u64],
location: &Location,
) -> ffi::OperatorIndex {
let out_shape = Shape {
dimensions_size: out_shape.to_owned(),
};
self.0.add_input(out_precision, out_shape).into()
self.0
.add_input(out_precision, out_shape, location.0.clone())
.into()
}
fn add_lut(
@@ -625,12 +635,15 @@ impl<'dag> DagBuilder<'dag> {
input: ffi::OperatorIndex,
table: &[u64],
out_precision: Precision,
location: &Location,
) -> ffi::OperatorIndex {
let table = FunctionTable {
values: table.to_owned(),
};
self.0.add_lut(input.into(), table, out_precision).into()
self.0
.add_lut(input.into(), table, out_precision, location.0.clone())
.into()
}
#[allow(clippy::boxed_local)]
@@ -638,10 +651,11 @@ impl<'dag> DagBuilder<'dag> {
&mut self,
inputs: &[ffi::OperatorIndex],
weights: Box<Weights>,
location: &Location,
) -> ffi::OperatorIndex {
let inputs: Vec<OperatorIndex> = inputs.iter().copied().map(Into::into).collect();
self.0.add_dot(inputs, weights.0).into()
self.0.add_dot(inputs, weights.0, location.0.clone()).into()
}
fn add_levelled_op(
@@ -649,10 +663,12 @@ impl<'dag> DagBuilder<'dag> {
inputs: &[ffi::OperatorIndex],
lwe_dim_cost_factor: f64,
fixed_cost: f64,
manp: f64,
weights: &[f64],
out_shape: &[u64],
comment: &str,
location: &Location,
) -> ffi::OperatorIndex {
debug_assert!(weights.len() == inputs.len());
let inputs: Vec<OperatorIndex> = inputs.iter().copied().map(Into::into).collect();
let out_shape = Shape {
@@ -665,7 +681,14 @@ impl<'dag> DagBuilder<'dag> {
};
self.0
.add_levelled_op(inputs, complexity, manp, out_shape, comment)
.add_levelled_op(
inputs,
complexity,
weights,
out_shape,
comment,
location.0.clone(),
)
.into()
}
@@ -673,16 +696,22 @@ impl<'dag> DagBuilder<'dag> {
&mut self,
input: ffi::OperatorIndex,
rounded_precision: Precision,
location: &Location,
) -> ffi::OperatorIndex {
self.0.add_round_op(input.into(), rounded_precision).into()
self.0
.add_round_op(input.into(), rounded_precision, location.0.clone())
.into()
}
fn add_unsafe_cast_op(
&mut self,
input: ffi::OperatorIndex,
new_precision: Precision,
location: &Location,
) -> ffi::OperatorIndex {
self.0.add_unsafe_cast(input.into(), new_precision).into()
self.0
.add_unsafe_cast(input.into(), new_precision, location.0.clone())
.into()
}
fn tag_operator_as_output(&mut self, op: ffi::OperatorIndex) {
@@ -694,6 +723,30 @@ impl<'dag> DagBuilder<'dag> {
}
}
#[derive(Clone)]
pub struct Location(operator::Location);
fn location_unknown() -> Box<Location> {
Box::new(Location(operator::Location::Unknown))
}
fn location_from_string(string: &str) -> Box<Location> {
let location: Vec<&str> = string.split(':').collect();
match location[..] {
[file] => Box::new(Location(operator::Location::File(file.into()))),
[file, line] => Box::new(Location(operator::Location::Line(
file.into(),
line.parse().unwrap(),
))),
[file, line, column] => Box::new(Location(operator::Location::LineColumn(
file.into(),
line.parse().unwrap(),
column.parse().unwrap(),
))),
_ => Box::new(Location(operator::Location::Unknown)),
}
}
pub struct Weights(operator::Weights);
fn vector(weights: &[i64]) -> Box<Weights> {
@@ -748,6 +801,14 @@ mod ffi {
type DagBuilder<'dag>;
type Location;
#[namespace = "concrete_optimizer::utils"]
fn location_unknown() -> Box<Location>;
#[namespace = "concrete_optimizer::utils"]
fn location_from_string(string: &str) -> Box<Location>;
#[namespace = "concrete_optimizer::dag"]
fn empty() -> Box<Dag>;
@@ -761,6 +822,7 @@ mod ffi {
self: &mut DagBuilder<'_>,
out_precision: u8,
out_shape: &[u64],
location: &Location,
) -> OperatorIndex;
unsafe fn add_lut(
@@ -768,12 +830,14 @@ mod ffi {
input: OperatorIndex,
table: &[u64],
out_precision: u8,
location: &Location,
) -> OperatorIndex;
unsafe fn add_dot(
self: &mut DagBuilder<'_>,
inputs: &[OperatorIndex],
weights: Box<Weights>,
location: &Location,
) -> OperatorIndex;
unsafe fn add_levelled_op(
@@ -781,21 +845,24 @@ mod ffi {
inputs: &[OperatorIndex],
lwe_dim_cost_factor: f64,
fixed_cost: f64,
manp: f64,
weights: &[f64],
out_shape: &[u64],
comment: &str,
location: &Location,
) -> OperatorIndex;
unsafe fn add_round_op(
self: &mut DagBuilder<'_>,
input: OperatorIndex,
rounded_precision: u8,
location: &Location,
) -> OperatorIndex;
unsafe fn add_unsafe_cast_op(
self: &mut DagBuilder<'_>,
input: OperatorIndex,
rounded_precision: u8,
location: &Location,
) -> OperatorIndex;
unsafe fn tag_operator_as_output(self: &mut DagBuilder<'_>, op: OperatorIndex);

View File

@@ -943,6 +943,7 @@ struct CircuitKeys;
namespace concrete_optimizer {
struct Dag;
struct DagBuilder;
struct Location;
struct Weights;
enum class Encoding : ::std::uint8_t;
enum class MultiParamStrategy : ::std::uint8_t;
@@ -993,12 +994,12 @@ private:
#define CXXBRIDGE1_STRUCT_concrete_optimizer$DagBuilder
struct DagBuilder final : public ::rust::Opaque {
::rust::String dump() const noexcept;
::concrete_optimizer::dag::OperatorIndex add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape) noexcept;
::concrete_optimizer::dag::OperatorIndex add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision) noexcept;
::concrete_optimizer::dag::OperatorIndex add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights) noexcept;
::concrete_optimizer::dag::OperatorIndex add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, double manp, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment) noexcept;
::concrete_optimizer::dag::OperatorIndex add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
::concrete_optimizer::dag::OperatorIndex add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
::concrete_optimizer::dag::OperatorIndex add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, ::rust::Slice<double const> weights, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
void tag_operator_as_output(::concrete_optimizer::dag::OperatorIndex op) noexcept;
~DagBuilder() = delete;
@@ -1011,6 +1012,20 @@ private:
};
#endif // CXXBRIDGE1_STRUCT_concrete_optimizer$DagBuilder
#ifndef CXXBRIDGE1_STRUCT_concrete_optimizer$Location
#define CXXBRIDGE1_STRUCT_concrete_optimizer$Location
struct Location final : public ::rust::Opaque {
~Location() = delete;
private:
friend ::rust::layout;
struct layout {
static ::std::size_t size() noexcept;
static ::std::size_t align() noexcept;
};
};
#endif // CXXBRIDGE1_STRUCT_concrete_optimizer$Location
#ifndef CXXBRIDGE1_STRUCT_concrete_optimizer$Weights
#define CXXBRIDGE1_STRUCT_concrete_optimizer$Weights
struct Weights final : public ::rust::Opaque {
@@ -1288,8 +1303,18 @@ extern "C" {
::std::size_t concrete_optimizer$cxxbridge1$Dag$operator$alignof() noexcept;
::std::size_t concrete_optimizer$cxxbridge1$DagBuilder$operator$sizeof() noexcept;
::std::size_t concrete_optimizer$cxxbridge1$DagBuilder$operator$alignof() noexcept;
::std::size_t concrete_optimizer$cxxbridge1$Location$operator$sizeof() noexcept;
::std::size_t concrete_optimizer$cxxbridge1$Location$operator$alignof() noexcept;
} // extern "C"
namespace utils {
extern "C" {
::concrete_optimizer::Location *concrete_optimizer$utils$cxxbridge1$location_unknown() noexcept;
::concrete_optimizer::Location *concrete_optimizer$utils$cxxbridge1$location_from_string(::rust::Str string) noexcept;
} // extern "C"
} // namespace utils
namespace dag {
extern "C" {
::concrete_optimizer::Dag *concrete_optimizer$dag$cxxbridge1$empty() noexcept;
@@ -1303,17 +1328,17 @@ void concrete_optimizer$cxxbridge1$Dag$dump(::concrete_optimizer::Dag const &sel
void concrete_optimizer$cxxbridge1$DagBuilder$dump(::concrete_optimizer::DagBuilder const &self, ::rust::String *return$) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_input(::concrete_optimizer::DagBuilder &self, ::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_input(::concrete_optimizer::DagBuilder &self, ::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_lut(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_lut(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_dot(::concrete_optimizer::DagBuilder &self, ::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::concrete_optimizer::Weights *weights) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_dot(::concrete_optimizer::DagBuilder &self, ::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::concrete_optimizer::Weights *weights, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_levelled_op(::concrete_optimizer::DagBuilder &self, ::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, double manp, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_levelled_op(::concrete_optimizer::DagBuilder &self, ::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, ::rust::Slice<double const> weights, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_round_op(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_round_op(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_unsafe_cast_op(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_unsafe_cast_op(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
void concrete_optimizer$cxxbridge1$DagBuilder$tag_operator_as_output(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex op) noexcept;
@@ -1393,6 +1418,24 @@ namespace utils {
return concrete_optimizer$cxxbridge1$DagBuilder$operator$alignof();
}
::std::size_t Location::layout::size() noexcept {
return concrete_optimizer$cxxbridge1$Location$operator$sizeof();
}
::std::size_t Location::layout::align() noexcept {
return concrete_optimizer$cxxbridge1$Location$operator$alignof();
}
namespace utils {
::rust::Box<::concrete_optimizer::Location> location_unknown() noexcept {
return ::rust::Box<::concrete_optimizer::Location>::from_raw(concrete_optimizer$utils$cxxbridge1$location_unknown());
}
::rust::Box<::concrete_optimizer::Location> location_from_string(::rust::Str string) noexcept {
return ::rust::Box<::concrete_optimizer::Location>::from_raw(concrete_optimizer$utils$cxxbridge1$location_from_string(string));
}
} // namespace utils
namespace dag {
::rust::Box<::concrete_optimizer::Dag> empty() noexcept {
return ::rust::Box<::concrete_optimizer::Dag>::from_raw(concrete_optimizer$dag$cxxbridge1$empty());
@@ -1415,28 +1458,28 @@ namespace dag {
return ::std::move(return$.value);
}
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_input(*this, out_precision, out_shape);
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape, ::concrete_optimizer::Location const &location) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_input(*this, out_precision, out_shape, location);
}
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_lut(*this, input, table, out_precision);
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision, ::concrete_optimizer::Location const &location) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_lut(*this, input, table, out_precision, location);
}
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_dot(*this, inputs, weights.into_raw());
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights, ::concrete_optimizer::Location const &location) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_dot(*this, inputs, weights.into_raw(), location);
}
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, double manp, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_levelled_op(*this, inputs, lwe_dim_cost_factor, fixed_cost, manp, out_shape, comment);
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, ::rust::Slice<double const> weights, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment, ::concrete_optimizer::Location const &location) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_levelled_op(*this, inputs, lwe_dim_cost_factor, fixed_cost, weights, out_shape, comment, location);
}
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_round_op(*this, input, rounded_precision);
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_round_op(*this, input, rounded_precision, location);
}
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_unsafe_cast_op(*this, input, rounded_precision);
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept {
return concrete_optimizer$cxxbridge1$DagBuilder$add_unsafe_cast_op(*this, input, rounded_precision, location);
}
void DagBuilder::tag_operator_as_output(::concrete_optimizer::dag::OperatorIndex op) noexcept {
@@ -1517,6 +1560,10 @@ namespace weights {
} // namespace concrete_optimizer
extern "C" {
::concrete_optimizer::Location *cxxbridge1$box$concrete_optimizer$Location$alloc() noexcept;
void cxxbridge1$box$concrete_optimizer$Location$dealloc(::concrete_optimizer::Location *) noexcept;
void cxxbridge1$box$concrete_optimizer$Location$drop(::rust::Box<::concrete_optimizer::Location> *ptr) noexcept;
::concrete_optimizer::Dag *cxxbridge1$box$concrete_optimizer$Dag$alloc() noexcept;
void cxxbridge1$box$concrete_optimizer$Dag$dealloc(::concrete_optimizer::Dag *) noexcept;
void cxxbridge1$box$concrete_optimizer$Dag$drop(::rust::Box<::concrete_optimizer::Dag> *ptr) noexcept;
@@ -1605,6 +1652,18 @@ void cxxbridge1$rust_vec$concrete_optimizer$dag$InstructionKeys$truncate(::rust:
namespace rust {
inline namespace cxxbridge1 {
template <>
::concrete_optimizer::Location *Box<::concrete_optimizer::Location>::allocation::alloc() noexcept {
return cxxbridge1$box$concrete_optimizer$Location$alloc();
}
template <>
void Box<::concrete_optimizer::Location>::allocation::dealloc(::concrete_optimizer::Location *ptr) noexcept {
cxxbridge1$box$concrete_optimizer$Location$dealloc(ptr);
}
template <>
void Box<::concrete_optimizer::Location>::drop() noexcept {
cxxbridge1$box$concrete_optimizer$Location$drop(this);
}
template <>
::concrete_optimizer::Dag *Box<::concrete_optimizer::Dag>::allocation::alloc() noexcept {
return cxxbridge1$box$concrete_optimizer$Dag$alloc();
}

View File

@@ -924,6 +924,7 @@ struct CircuitKeys;
namespace concrete_optimizer {
struct Dag;
struct DagBuilder;
struct Location;
struct Weights;
enum class Encoding : ::std::uint8_t;
enum class MultiParamStrategy : ::std::uint8_t;
@@ -974,12 +975,12 @@ private:
#define CXXBRIDGE1_STRUCT_concrete_optimizer$DagBuilder
struct DagBuilder final : public ::rust::Opaque {
::rust::String dump() const noexcept;
::concrete_optimizer::dag::OperatorIndex add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape) noexcept;
::concrete_optimizer::dag::OperatorIndex add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision) noexcept;
::concrete_optimizer::dag::OperatorIndex add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights) noexcept;
::concrete_optimizer::dag::OperatorIndex add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, double manp, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment) noexcept;
::concrete_optimizer::dag::OperatorIndex add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
::concrete_optimizer::dag::OperatorIndex add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
::concrete_optimizer::dag::OperatorIndex add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, ::rust::Slice<double const> weights, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
::concrete_optimizer::dag::OperatorIndex add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
void tag_operator_as_output(::concrete_optimizer::dag::OperatorIndex op) noexcept;
~DagBuilder() = delete;
@@ -992,6 +993,20 @@ private:
};
#endif // CXXBRIDGE1_STRUCT_concrete_optimizer$DagBuilder
#ifndef CXXBRIDGE1_STRUCT_concrete_optimizer$Location
#define CXXBRIDGE1_STRUCT_concrete_optimizer$Location
struct Location final : public ::rust::Opaque {
~Location() = delete;
private:
friend ::rust::layout;
struct layout {
static ::std::size_t size() noexcept;
static ::std::size_t align() noexcept;
};
};
#endif // CXXBRIDGE1_STRUCT_concrete_optimizer$Location
#ifndef CXXBRIDGE1_STRUCT_concrete_optimizer$Weights
#define CXXBRIDGE1_STRUCT_concrete_optimizer$Weights
struct Weights final : public ::rust::Opaque {
@@ -1258,6 +1273,10 @@ namespace utils {
::concrete_optimizer::dag::DagSolution convert_to_dag_solution(::concrete_optimizer::v0::Solution const &solution) noexcept;
::concrete_optimizer::dag::CircuitSolution convert_to_circuit_solution(::concrete_optimizer::dag::DagSolution const &solution, ::concrete_optimizer::Dag const &dag) noexcept;
::rust::Box<::concrete_optimizer::Location> location_unknown() noexcept;
::rust::Box<::concrete_optimizer::Location> location_from_string(::rust::Str string) noexcept;
} // namespace utils
namespace dag {

View File

@@ -51,7 +51,7 @@ TEST test_dag_no_lut() {
std::vector<uint64_t> shape = {3};
concrete_optimizer::dag::OperatorIndex node1 =
builder->add_input(PRECISION_8B, slice(shape));
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
std::vector<concrete_optimizer::dag::OperatorIndex> inputs = {node1};
@@ -60,7 +60,7 @@ TEST test_dag_no_lut() {
rust::cxxbridge1::Box<concrete_optimizer::Weights> weights =
concrete_optimizer::weights::vector(slice(weight_vec));
auto id = builder->add_dot(slice(inputs), std::move(weights));
auto id = builder->add_dot(slice(inputs), std::move(weights), *concrete_optimizer::utils::location_unknown());
builder->tag_operator_as_output(id);
auto solution = dag->optimize(default_options());
@@ -75,10 +75,10 @@ TEST test_dag_lut() {
std::vector<uint64_t> shape = {3};
concrete_optimizer::dag::OperatorIndex input =
builder->add_input(PRECISION_8B, slice(shape));
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
std::vector<u_int64_t> table = {};
auto id = builder->add_lut(input, slice(table), PRECISION_8B);
auto id = builder->add_lut(input, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
builder->tag_operator_as_output(id);
auto solution = dag->optimize(default_options());
@@ -94,10 +94,10 @@ TEST test_dag_lut_wop() {
std::vector<uint64_t> shape = {3};
concrete_optimizer::dag::OperatorIndex input =
builder->add_input(PRECISION_16B, slice(shape));
builder->add_input(PRECISION_16B, slice(shape), *concrete_optimizer::utils::location_unknown());
std::vector<u_int64_t> table = {};
auto id = builder->add_lut(input, slice(table), PRECISION_16B);
auto id = builder->add_lut(input, slice(table), PRECISION_16B, *concrete_optimizer::utils::location_unknown());
builder->tag_operator_as_output(id);
auto solution = dag->optimize(default_options());
@@ -113,10 +113,10 @@ TEST test_dag_lut_force_wop() {
std::vector<uint64_t> shape = {3};
concrete_optimizer::dag::OperatorIndex input =
builder->add_input(PRECISION_8B, slice(shape));
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
std::vector<u_int64_t> table = {};
auto id = builder->add_lut(input, slice(table), PRECISION_8B);
auto id = builder->add_lut(input, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
builder->tag_operator_as_output(id);
auto options = default_options();
@@ -133,10 +133,10 @@ TEST test_multi_parameters_1_precision() {
std::vector<uint64_t> shape = {3};
concrete_optimizer::dag::OperatorIndex input =
builder->add_input(PRECISION_8B, slice(shape));
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
std::vector<u_int64_t> table = {};
auto id = builder->add_lut(input, slice(table), PRECISION_8B);
auto id = builder->add_lut(input, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
builder->tag_operator_as_output(id);
auto options = default_options();
@@ -160,14 +160,14 @@ TEST test_multi_parameters_2_precision() {
std::vector<uint64_t> shape = {3};
concrete_optimizer::dag::OperatorIndex input1 =
builder->add_input(PRECISION_8B, slice(shape));
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
concrete_optimizer::dag::OperatorIndex input2 =
builder->add_input(PRECISION_1B, slice(shape));
builder->add_input(PRECISION_1B, slice(shape), *concrete_optimizer::utils::location_unknown());
std::vector<u_int64_t> table = {};
auto lut1 = builder->add_lut(input1, slice(table), PRECISION_8B);
auto lut2 = builder->add_lut(input2, slice(table), PRECISION_8B);
auto lut1 = builder->add_lut(input1, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
auto lut2 = builder->add_lut(input2, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
std::vector<concrete_optimizer::dag::OperatorIndex> inputs = {lut1, lut2};
@@ -176,7 +176,7 @@ TEST test_multi_parameters_2_precision() {
rust::cxxbridge1::Box<concrete_optimizer::Weights> weights =
concrete_optimizer::weights::vector(slice(weight_vec));
auto id = builder->add_dot(slice(inputs), std::move(weights));
auto id = builder->add_dot(slice(inputs), std::move(weights), *concrete_optimizer::utils::location_unknown());
builder->tag_operator_as_output(id);
auto options = default_options();
@@ -201,14 +201,14 @@ TEST test_multi_parameters_2_precision_crt() {
std::vector<uint64_t> shape = {3};
concrete_optimizer::dag::OperatorIndex input1 =
builder->add_input(PRECISION_8B, slice(shape));
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
concrete_optimizer::dag::OperatorIndex input2 =
builder->add_input(PRECISION_1B, slice(shape));
builder->add_input(PRECISION_1B, slice(shape), *concrete_optimizer::utils::location_unknown());
std::vector<u_int64_t> table = {};
auto lut1 = builder->add_lut(input1, slice(table), PRECISION_8B);
auto lut2 = builder->add_lut(input2, slice(table), PRECISION_8B);
auto lut1 = builder->add_lut(input1, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
auto lut2 = builder->add_lut(input2, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
std::vector<concrete_optimizer::dag::OperatorIndex> inputs = {lut1, lut2};
@@ -217,7 +217,7 @@ TEST test_multi_parameters_2_precision_crt() {
rust::cxxbridge1::Box<concrete_optimizer::Weights> weights =
concrete_optimizer::weights::vector(slice(weight_vec));
auto id = builder->add_dot(slice(inputs), std::move(weights));
auto id = builder->add_dot(slice(inputs), std::move(weights), *concrete_optimizer::utils::location_unknown());
builder->tag_operator_as_output(id);
auto options = default_options();

View File

@@ -0,0 +1,28 @@
use std::{fmt::Display, path::PathBuf};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Location {
Unknown,
File(PathBuf),
Line(PathBuf, usize),
LineColumn(PathBuf, usize, usize),
}
impl Display for Location {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Unknown => write!(f, "unknown location"),
Self::File(file) => write!(f, "{}", file.file_name().unwrap().to_str().unwrap()),
Self::Line(file, line) => {
write!(f, "{}:{line}", file.file_name().unwrap().to_str().unwrap())
}
Self::LineColumn(file, line, column) => {
write!(
f,
"{}:{line}:{column}",
file.file_name().unwrap().to_str().unwrap()
)
}
}
}
}

View File

@@ -1,8 +1,10 @@
#![allow(clippy::module_inception)]
pub mod dot_kind;
pub mod location;
pub mod operator;
pub mod tensor;
pub use self::dot_kind::*;
pub use self::location::*;
pub use self::operator::*;
pub use self::tensor::*;

View File

@@ -3,6 +3,7 @@ use std::iter::{empty, once};
use std::ops::Deref;
use crate::dag::operator::tensor::{ClearTensor, Shape};
use crate::optimization::dag::multi_parameters::partition_cut::ExternalPartition;
use super::DotKind;
@@ -89,7 +90,7 @@ pub enum Operator {
LevelledOp {
inputs: Vec<OperatorIndex>,
complexity: LevelledComplexity,
manp: f64,
weights: Vec<f64>,
out_shape: Shape,
comment: String,
},
@@ -104,6 +105,11 @@ pub enum Operator {
input: OperatorIndex,
out_precision: Precision,
},
ChangePartition {
input: OperatorIndex,
src_partition: Option<ExternalPartition>,
dst_partition: Option<ExternalPartition>,
},
}
impl Operator {
@@ -114,7 +120,8 @@ impl Operator {
Self::LevelledOp { inputs, .. } | Self::Dot { inputs, .. } => Box::new(inputs.iter()),
Self::UnsafeCast { input, .. }
| Self::Lut { input, .. }
| Self::Round { input, .. } => Box::new(once(input)),
| Self::Round { input, .. }
| Self::ChangePartition { input, .. } => Box::new(once(input)),
}
}
}
@@ -171,7 +178,7 @@ impl fmt::Display for Operator {
}
Self::LevelledOp {
inputs,
manp,
weights,
out_shape,
..
} => {
@@ -182,7 +189,7 @@ impl fmt::Display for Operator {
}
write!(f, "%{}", input.0)?;
}
write!(f, "] : manp={manp} x {out_shape:?}")?;
write!(f, "] : weights={weights:?}, out_shape={out_shape:?}")?;
}
Self::Round {
input,
@@ -190,6 +197,23 @@ impl fmt::Display for Operator {
} => {
write!(f, "ROUND[%{}] : u{out_precision}", input.0)?;
}
Self::ChangePartition {
input,
src_partition,
dst_partition,
} => {
write!(f, "CHANGE_PARTITION[%{}] : {{", input.0)?;
if let Some(partition) = src_partition {
write!(f, "src_partition: {}", partition.name)?;
}
if let Some(partition) = dst_partition {
if src_partition.is_some() {
write!(f, ", ")?;
}
write!(f, "dst_partition: {}", partition.name)?;
}
write!(f, "}}")?;
}
}
Ok(())
}

Some files were not shown because too many files have changed in this diff Show More