mirror of
https://github.com/zama-ai/concrete.git
synced 2026-01-09 12:57:55 -05:00
chore(common): Merge branch 'main' into release/2.7.x
This commit is contained in:
2
.github/workflows/block_merge.yml
vendored
2
.github/workflows/block_merge.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Check first line
|
||||
uses: gsactions/commit-message-checker@v1
|
||||
uses: gsactions/commit-message-checker@16fa2d5de096ae0d35626443bcd24f1e756cafee # v2.0.0
|
||||
with:
|
||||
pattern: '^(feat|fix|test|bench|docs|chore|refactor|perf)\((compiler|backend|frontend|optimizer|tools|ci|common).*\): '
|
||||
flags: 'gs'
|
||||
|
||||
13
.github/workflows/compiler_benchmark.yml
vendored
13
.github/workflows/compiler_benchmark.yml
vendored
@@ -47,13 +47,8 @@ jobs:
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
|
||||
# SSH private key is required as some dependencies are from private repos
|
||||
- uses: webfactory/ssh-agent@v0.7.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
|
||||
|
||||
- name: Fetch submodules
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
@@ -100,7 +95,7 @@ jobs:
|
||||
make ${{ env.BENCHMARK_TARGET }}
|
||||
|
||||
- name: Upload raw results artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: compiler_${{ github.sha }}_raw
|
||||
path: compilers/concrete-compiler/compiler/benchmarks_results.json
|
||||
@@ -120,13 +115,13 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: compiler_${{ github.sha }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
@@ -45,12 +45,6 @@ jobs:
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
echo "Matrix item: ${{ inputs.matrix_item }}"
|
||||
|
||||
# A SSH private key is required as some dependencies are from private repos
|
||||
- name: Set up SSH agent
|
||||
uses: webfactory/ssh-agent@v0.7.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
|
||||
|
||||
- name: Set up env
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
@@ -58,7 +52,7 @@ jobs:
|
||||
echo "SSH_AUTH_SOCK_DIR=$(dirname $SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Fetch repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
|
||||
@@ -70,7 +64,7 @@ jobs:
|
||||
run: mkdir build
|
||||
|
||||
- name: Build compiler
|
||||
uses: addnab/docker-run-action@v3
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
id: build-compiler
|
||||
with:
|
||||
registry: ghcr.io
|
||||
@@ -86,16 +80,16 @@ jobs:
|
||||
${{ env.DOCKER_GPU_OPTION }}
|
||||
shell: bash
|
||||
run: |
|
||||
rustup toolchain install nightly-2024-01-31
|
||||
rustup toolchain install nightly-2024-07-01
|
||||
set -e
|
||||
cd /concrete/compilers/concrete-compiler/compiler
|
||||
rm -rf /build/*
|
||||
make DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build all build-end-to-end-dataflow-tests
|
||||
make DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build all
|
||||
echo "Debug: ccache statistics (after the build):"
|
||||
ccache -s
|
||||
|
||||
- name: Build compiler Dialects docs and check diff
|
||||
uses: addnab/docker-run-action@v3
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
id: build-compiler-docs
|
||||
with:
|
||||
registry: ghcr.io
|
||||
@@ -127,7 +121,7 @@ jobs:
|
||||
run: echo "MINIMAL_TESTS=ON" >> $GITHUB_ENV
|
||||
|
||||
- name: Test compiler
|
||||
uses: addnab/docker-run-action@v3
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
image: ${{ env.DOCKER_IMAGE_TEST }}
|
||||
@@ -140,13 +134,13 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
rustup toolchain install nightly-2024-01-31
|
||||
rustup toolchain install nightly-2024-07-01
|
||||
cd /concrete/compilers/concrete-compiler/compiler
|
||||
pip install pytest
|
||||
dnf install -y libzstd libzstd-devel
|
||||
sed "s/pytest/python -m pytest/g" -i Makefile
|
||||
mkdir -p /tmp/concrete_compiler/gpu_tests/
|
||||
make MINIMAL_TESTS=${{ env.MINIMAL_TESTS }} DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build run-tests run-end-to-end-dataflow-tests
|
||||
make MINIMAL_TESTS=${{ env.MINIMAL_TESTS }} DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build run-tests
|
||||
chmod -R ugo+rwx /tmp/KeySetCache
|
||||
|
||||
- name: Analyze logs
|
||||
@@ -155,7 +149,7 @@ jobs:
|
||||
ls -1 | xargs grep -H "WARNING RETRY" | sed -e "s/.log.*//g" | uniq -c | sed -re "s/ *([0-9]*) (.*)/::warning ::Test \2 retried \1 times/g" | cat
|
||||
|
||||
# - name: Archive python package
|
||||
# uses: actions/upload-artifact@v3
|
||||
# uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
|
||||
# with:
|
||||
# name: concrete-compiler.whl
|
||||
# path: build/wheels/concrete_compiler-*-manylinux_{{ env.GLIB_VER }}_x86_64.whl
|
||||
|
||||
@@ -47,7 +47,7 @@ jobs:
|
||||
docker system prune -af
|
||||
|
||||
- name: Fetch repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
@@ -74,13 +74,14 @@ jobs:
|
||||
cd compilers/concrete-compiler/compiler
|
||||
rm -rf /shared/build
|
||||
make HPX_DIR=/shared/hpx install-hpx-from-source
|
||||
make HPX_DIR=/shared/hpx BUILD_DIR=/shared/build CCACHE=ON DATAFLOW_EXECUTION_ENABLED=ON BINDINGS_PYTHON_ENABLED=OFF CUDA_SUPPORT=${{ env.CUDA_SUPPORT }} build-end-to-end-dataflow-tests
|
||||
make HPX_DIR=/shared/hpx BUILD_DIR=/shared/build CCACHE=ON DATAFLOW_EXECUTION_ENABLED=ON BINDINGS_PYTHON_ENABLED=OFF CUDA_SUPPORT=${{ env.CUDA_SUPPORT }} build-end-to-end-tests
|
||||
|
||||
- name: Run end-to-end benchmarks
|
||||
run: |
|
||||
set -e
|
||||
cd compilers/concrete-compiler/compiler
|
||||
make BUILD_DIR=/shared/build run-end-to-end-distributed-tests
|
||||
rm -rf /shared/KeyCache
|
||||
make BUILD_DIR=/shared/build KEY_CACHE_DIRECTORY=/shared/KeyCache run-end-to-end-distributed-tests
|
||||
|
||||
- name: Instance cleanup
|
||||
run: |
|
||||
|
||||
@@ -45,12 +45,6 @@ jobs:
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
echo "Matrix item: ${{ inputs.matrix_item }}"
|
||||
|
||||
# A SSH private key is required as some dependencies are from private repos
|
||||
- name: Set up SSH agent
|
||||
uses: webfactory/ssh-agent@v0.7.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
|
||||
|
||||
- name: Set up env
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
@@ -58,7 +52,7 @@ jobs:
|
||||
echo "SSH_AUTH_SOCK_DIR=$(dirname $SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Fetch repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
|
||||
@@ -70,7 +64,7 @@ jobs:
|
||||
run: mkdir build
|
||||
|
||||
- name: Build and test compiler
|
||||
uses: addnab/docker-run-action@v3
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
id: build-compiler
|
||||
with:
|
||||
registry: ghcr.io
|
||||
@@ -86,7 +80,7 @@ jobs:
|
||||
--gpus all
|
||||
shell: bash
|
||||
run: |
|
||||
rustup toolchain install nightly-2024-01-31
|
||||
rustup toolchain install nightly-2024-07-01
|
||||
set -e
|
||||
cd /concrete/compilers/concrete-compiler/compiler
|
||||
rm -rf /build/*
|
||||
|
||||
@@ -8,7 +8,7 @@ jobs:
|
||||
FormattingAndLinting:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
- name: Format with clang-format (Cpp)
|
||||
run: |
|
||||
sudo apt install moreutils
|
||||
@@ -34,6 +34,6 @@ jobs:
|
||||
CheckLicense:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
- name: Check if sources include the license header
|
||||
run: .github/workflows/scripts/check_for_license.sh
|
||||
|
||||
@@ -24,12 +24,7 @@ jobs:
|
||||
runson: ["aws-mac1-metal", "aws-mac2-metal"]
|
||||
runs-on: ${{ matrix.runson }}
|
||||
steps:
|
||||
# A SSH private key is required as some dependencies are from private repos
|
||||
- uses: webfactory/ssh-agent@v0.7.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
|
||||
@@ -45,7 +40,7 @@ jobs:
|
||||
|
||||
- name: Cache compilation (push)
|
||||
if: github.event_name == 'push'
|
||||
uses: actions/cache@v3
|
||||
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
|
||||
with:
|
||||
path: /Users/runner/Library/Caches/ccache
|
||||
key: ${{ runner.os }}-${{ runner.arch }}-compilation-cache-${{ github.sha }}
|
||||
@@ -54,7 +49,7 @@ jobs:
|
||||
|
||||
- name: Cache compilation (pull_request)
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: actions/cache@v3
|
||||
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
|
||||
with:
|
||||
path: /Users/runner/Library/Caches/ccache
|
||||
key: ${{ runner.os }}-${{ runner.arch }}-compilation-cache-${{ github.event.pull_request.base.sha }}
|
||||
|
||||
@@ -49,12 +49,8 @@ jobs:
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
# SSH private key is required as some dependencies are from private repos
|
||||
- uses: webfactory/ssh-agent@v0.7.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
|
||||
@@ -66,7 +62,6 @@ jobs:
|
||||
- name: Build Image
|
||||
run: |
|
||||
DOCKER_BUILDKIT=1 docker build --no-cache \
|
||||
--ssh default=${{ env.SSH_AUTH_SOCK }} \
|
||||
--label "commit-sha=${{ github.sha }}" -t ${{ matrix.image }} -f ${{ matrix.dockerfile }} .
|
||||
|
||||
- name: Tag and Publish Image
|
||||
@@ -88,7 +83,7 @@ jobs:
|
||||
IMAGE: ghcr.io/zama-ai/hpx
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -98,7 +93,7 @@ jobs:
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v44
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c # v44.5.24
|
||||
|
||||
- name: Login
|
||||
id: login
|
||||
@@ -127,7 +122,7 @@ jobs:
|
||||
dockerfile: docker/Dockerfile.cuda-118-env
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -137,7 +132,7 @@ jobs:
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v44
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c # v44.5.24
|
||||
|
||||
- name: Login
|
||||
id: login
|
||||
|
||||
16
.github/workflows/concrete_cpu_test.yml
vendored
16
.github/workflows/concrete_cpu_test.yml
vendored
@@ -16,17 +16,17 @@ jobs:
|
||||
env:
|
||||
RUSTFLAGS: -D warnings
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
|
||||
- name: Rust install
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
|
||||
with:
|
||||
toolchain: nightly-2024-01-31
|
||||
toolchain: nightly-2024-07-01
|
||||
override: true
|
||||
components: rustfmt, clippy
|
||||
|
||||
- name: Download cargo cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
uses: Swatinem/rust-cache@23bce251a8cd2ffc3c1075eaa2367cf899916d84 # v2.7.3
|
||||
|
||||
- name: Formatting
|
||||
run: |
|
||||
@@ -63,13 +63,13 @@ jobs:
|
||||
env:
|
||||
RUSTFLAGS: -D warnings
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
|
||||
- name: Setup rust toolchain for concrete-cpu
|
||||
uses: ./.github/workflows/setup_rust_toolchain_for_concrete_cpu
|
||||
|
||||
- name: Download cargo cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
uses: Swatinem/rust-cache@23bce251a8cd2ffc3c1075eaa2367cf899916d84 # v2.7.3
|
||||
|
||||
- name: Tests
|
||||
run: |
|
||||
@@ -81,13 +81,13 @@ jobs:
|
||||
env:
|
||||
RUSTFLAGS: -D warnings
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
|
||||
- name: Setup rust toolchain for concrete-cpu
|
||||
uses: ./.github/workflows/setup_rust_toolchain_for_concrete_cpu
|
||||
|
||||
- name: Download cargo cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
uses: Swatinem/rust-cache@23bce251a8cd2ffc3c1075eaa2367cf899916d84 # v2.7.3
|
||||
|
||||
- name: Tests
|
||||
run: |
|
||||
|
||||
11
.github/workflows/concrete_ml_tests.yml
vendored
11
.github/workflows/concrete_ml_tests.yml
vendored
@@ -38,11 +38,6 @@ jobs:
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
echo "User Inputs: ${{ inputs.user_inputs }}"
|
||||
|
||||
- name: Set up SSH agent
|
||||
uses: webfactory/ssh-agent@v0.7.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
|
||||
|
||||
- name: Set up GitHub environment
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
@@ -50,7 +45,7 @@ jobs:
|
||||
echo "SSH_AUTH_SOCK_DIR=$(dirname $SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
|
||||
@@ -65,7 +60,7 @@ jobs:
|
||||
run: mkdir build
|
||||
|
||||
- name: Build wheel
|
||||
uses: addnab/docker-run-action@v3
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
id: build-compiler-bindings
|
||||
with:
|
||||
registry: ghcr.io
|
||||
@@ -104,7 +99,7 @@ jobs:
|
||||
deactivate
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
|
||||
125
.github/workflows/concrete_python_benchmark.yml
vendored
Normal file
125
.github/workflows/concrete_python_benchmark.yml
vendored
Normal file
@@ -0,0 +1,125 @@
|
||||
name: Concrete Python Benchmark
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 1 * * SAT"
|
||||
|
||||
env:
|
||||
DOCKER_IMAGE: ghcr.io/zama-ai/concrete-compiler
|
||||
GLIB_VER: 2_28
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup Instance
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: m7i-cpu-bench
|
||||
|
||||
concrete-python-benchmarks:
|
||||
name: Run Concrete Python Benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Benchmark
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
id: build-compiler-bindings
|
||||
with:
|
||||
registry: ghcr.io
|
||||
image: ${{ env.DOCKER_IMAGE }}
|
||||
username: ${{ secrets.GHCR_LOGIN }}
|
||||
password: ${{ secrets.GHCR_PASSWORD }}
|
||||
options: >-
|
||||
-v ${{ github.workspace }}:/concrete
|
||||
-v ${{ github.workspace }}/build:/build
|
||||
-v ${{ env.SSH_AUTH_SOCK }}:/ssh.socket
|
||||
-e SSH_AUTH_SOCK=/ssh.socket
|
||||
${{ env.DOCKER_GPU_OPTION }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
|
||||
rustup toolchain install nightly-2024-07-01
|
||||
rm -rf /build/*
|
||||
|
||||
export PYTHON=${{ format('python{0}', matrix.python-version) }}
|
||||
echo "Using $PYTHON"
|
||||
|
||||
dnf -y install graphviz graphviz-devel
|
||||
|
||||
cd /concrete/frontends/concrete-python
|
||||
make PYTHON=$PYTHON venv
|
||||
source .venv/bin/activate
|
||||
|
||||
cd /concrete/compilers/concrete-compiler/compiler
|
||||
make BUILD_DIR=/build CCACHE=ON DATAFLOW_EXECUTION_ENABLED=ON Python3_EXECUTABLE=$(which python) python-bindings
|
||||
|
||||
echo "Debug: ccache statistics (after the build):"
|
||||
ccache -s
|
||||
|
||||
cd /concrete/frontends/concrete-python
|
||||
|
||||
export COMPILER_BUILD_DIRECTORY="/build"
|
||||
export PROGRESS_MACHINE_NAME="m7i.48xlarge"
|
||||
|
||||
make benchmark
|
||||
make process-benchmark-results-for-grafana
|
||||
|
||||
deactivate
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh frontends/concrete-python/progress.processed.json '${{ secrets.JOB_SECRET }}')"
|
||||
|
||||
cd frontends/concrete-python
|
||||
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @progress.processed.json \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown Instance
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, concrete-python-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
2
.github/workflows/concrete_python_checks.yml
vendored
2
.github/workflows/concrete_python_checks.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
Checks:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
- name: Pre-Commit Checks
|
||||
run: |
|
||||
sudo apt install -y graphviz libgraphviz-dev
|
||||
|
||||
@@ -20,7 +20,7 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
|
||||
- name: Get version from tag
|
||||
run: |
|
||||
@@ -34,7 +34,7 @@ jobs:
|
||||
docker image build -t ${{ env.DOCKER_IMAGE_NAME }} --build-arg version=${{ env.VERSION }} -f ${{ env.DOCKER_FILE }} empty_context
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20
|
||||
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
36
.github/workflows/concrete_python_release.yml
vendored
36
.github/workflows/concrete_python_release.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
@@ -73,7 +73,7 @@ jobs:
|
||||
run: cp frontends/concrete-python/version.txt frontends/concrete-python/concrete/fhe/version.py
|
||||
|
||||
- name: Build wheel
|
||||
uses: addnab/docker-run-action@v3
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
id: build-compiler-bindings
|
||||
with:
|
||||
registry: ghcr.io
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
run: |
|
||||
set -e
|
||||
|
||||
rustup toolchain install nightly-2024-01-31
|
||||
rustup toolchain install nightly-2024-07-01
|
||||
rm -rf /build/*
|
||||
|
||||
export PYTHON=${{ format('python{0}', matrix.python-version) }}
|
||||
@@ -116,7 +116,7 @@ jobs:
|
||||
deactivate
|
||||
|
||||
- name: Upload wheel
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: ${{ format('wheel-{0}-linux-x86', matrix.python-version) }}
|
||||
path: frontends/concrete-python/dist/*manylinux*.whl
|
||||
@@ -131,7 +131,7 @@ jobs:
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
@@ -193,7 +193,7 @@ jobs:
|
||||
deactivate
|
||||
|
||||
- name: Upload wheel
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: ${{ format('wheel-{0}-{1}', matrix.python-version, matrix.runs-on) }}
|
||||
path: frontends/concrete-python/dist/*macos*.whl
|
||||
@@ -206,7 +206,7 @@ jobs:
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
|
||||
- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
with:
|
||||
path: frontends/concrete-python/dist
|
||||
pattern: wheel-*
|
||||
@@ -230,20 +230,20 @@ jobs:
|
||||
needs: [build-linux-x86, build-macos, provenance]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- uses: actions/download-artifact@v4
|
||||
- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
with:
|
||||
path: wheels
|
||||
pattern: 'wheel-*'
|
||||
merge-multiple: true
|
||||
- uses: actions/download-artifact@v4
|
||||
- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
with:
|
||||
pattern: '*.intoto.jsonl'
|
||||
# When building a new tag, create a new draft release.
|
||||
# When building a new public tag, create a new draft release.
|
||||
- name: create draft release
|
||||
if: ${{ env.RELEASE_TYPE == 'public' || env.RELEASE_TYPE == 'nightly' }}
|
||||
if: ${{ env.RELEASE_TYPE == 'public'}}
|
||||
run: |
|
||||
export TAG=$(git describe --tags --abbrev=0)
|
||||
echo $TAG
|
||||
@@ -278,7 +278,7 @@ jobs:
|
||||
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
https://api.github.com/repos/zama-ai/concrete/actions/workflows/concrete_python_push_docker_image.yml/dispatches \
|
||||
-d "{\"ref\": \"$TAG\", \"inputs\": {\"tag\":\"$TAG\"}}"
|
||||
-d "{\"ref\": \"$TAG\", \"inputs\": {\"tag\":\"v$TAG\"}}"
|
||||
|
||||
test-linux-x86:
|
||||
needs: [build-linux-x86]
|
||||
@@ -289,16 +289,16 @@ jobs:
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Download wheels
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
with:
|
||||
name: ${{ format('wheel-{0}-linux-x86', matrix.python-version) }}
|
||||
path: ${{ format('wheel-{0}-linux-x86', matrix.python-version) }}
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
path: repo
|
||||
@@ -339,12 +339,12 @@ jobs:
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
steps:
|
||||
- name: Download wheels
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
with:
|
||||
name: ${{ format('wheel-{0}-{1}', matrix.python-version, matrix.runs-on) }}
|
||||
path: ${{ format('wheel-{0}-{1}', matrix.python-version, matrix.runs-on) }}
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
path: repo
|
||||
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
run: cp frontends/concrete-python/version.txt frontends/concrete-python/concrete/fhe/version.py
|
||||
|
||||
- name: Build wheel
|
||||
uses: addnab/docker-run-action@v3
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
id: build-compiler-bindings
|
||||
with:
|
||||
registry: ghcr.io
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
run: |
|
||||
set -e
|
||||
|
||||
rustup toolchain install nightly-2024-01-31
|
||||
rustup toolchain install nightly-2024-07-01
|
||||
rm -rf /build/*
|
||||
|
||||
export PYTHON=${{ format('python{0}', matrix.python-version) }}
|
||||
@@ -104,7 +104,7 @@ jobs:
|
||||
|
||||
cd /concrete/compilers/concrete-compiler/compiler
|
||||
make BUILD_DIR=/build CCACHE=ON DATAFLOW_EXECUTION_ENABLED=OFF Python3_EXECUTABLE=$(which python) \
|
||||
CUDA_SUPPORT=ON CUDA_PATH=${{ env.CUDA_PATH }} python-bindings
|
||||
CUDA_SUPPORT=ON TIMING_ENABLED=ON CUDA_PATH=${{ env.CUDA_PATH }} python-bindings
|
||||
|
||||
echo "Debug: ccache statistics (after the build):"
|
||||
ccache -s
|
||||
@@ -117,7 +117,7 @@ jobs:
|
||||
deactivate
|
||||
|
||||
- name: Upload wheel
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: ${{ format('wheel-{0}-linux-x86', matrix.python-version) }}
|
||||
path: frontends/concrete-python/dist/*manylinux*.whl
|
||||
@@ -126,10 +126,12 @@ jobs:
|
||||
push:
|
||||
needs: [build-linux-x86]
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
wheel_version: ${{ steps.version.outputs.wheel_version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
|
||||
- uses: actions/download-artifact@v4
|
||||
- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
with:
|
||||
path: wheels
|
||||
merge-multiple: true
|
||||
@@ -155,10 +157,17 @@ jobs:
|
||||
# update indexes and invalidate cloudfront cache
|
||||
python .github/workflows/scripts/s3_update_html_indexes.py
|
||||
|
||||
- name: Output Wheel Version
|
||||
id: version
|
||||
run: |
|
||||
export VERSION=`ls ./wheels/*manylinux* | head -n1 | cut -d "-" -f2`
|
||||
echo "VERSION=$VERSION"
|
||||
echo "wheel_version=$VERSION" >> "$GITHUB_OUTPUT"
|
||||
|
||||
test-gpu-wheel:
|
||||
needs: [push]
|
||||
uses: ./.github/workflows/start_slab.yml
|
||||
secrets: inherit
|
||||
with:
|
||||
command: concrete-python-test-gpu-wheel
|
||||
user_inputs: "TODO"
|
||||
user_inputs: "${{ needs.push.outputs.wheel_version }}"
|
||||
|
||||
@@ -35,16 +35,15 @@ jobs:
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
# TODO: specify CP version
|
||||
- name: Install CP
|
||||
run: pip install --pre --extra-index-url https://pypi.zama.ai/gpu/ concrete-python
|
||||
run: pip install --pre --extra-index-url https://pypi.zama.ai/gpu/ "concrete-python==${{ env.CP_VERSION }}"
|
||||
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
path: repo
|
||||
|
||||
|
||||
11
.github/workflows/concrete_python_test_macos.yml
vendored
11
.github/workflows/concrete_python_test_macos.yml
vendored
@@ -21,13 +21,8 @@ jobs:
|
||||
|
||||
runs-on: ${{ matrix.machine }}
|
||||
steps:
|
||||
- name: Set up SSH agent
|
||||
uses: webfactory/ssh-agent@v0.7.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
|
||||
@@ -41,7 +36,7 @@ jobs:
|
||||
|
||||
- name: Cache Compilation (push)
|
||||
if: github.event_name == 'push'
|
||||
uses: actions/cache@v3
|
||||
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
|
||||
with:
|
||||
path: /Users/runner/Library/Caches/ccache
|
||||
key: ${{ runner.os }}-${{ runner.arch }}-compilation-cache-${{ github.sha }}
|
||||
@@ -50,7 +45,7 @@ jobs:
|
||||
|
||||
- name: Cache Compilation (pull_request)
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: actions/cache@v3
|
||||
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
|
||||
with:
|
||||
path: /Users/runner/Library/Caches/ccache
|
||||
key: ${{ runner.os }}-${{ runner.arch }}-compilation-cache-${{ github.event.pull_request.base.sha }}
|
||||
|
||||
@@ -42,11 +42,6 @@ jobs:
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Set up SSH agent
|
||||
uses: webfactory/ssh-agent@v0.7.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
|
||||
|
||||
- name: Set up GitHub environment
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
@@ -54,7 +49,7 @@ jobs:
|
||||
echo "SSH_AUTH_SOCK_DIR=$(dirname $SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
submodules: recursive
|
||||
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
|
||||
@@ -66,7 +61,7 @@ jobs:
|
||||
uses: ./.github/workflows/setup_rust_toolchain_for_concrete_cpu
|
||||
|
||||
- name: Build bindings
|
||||
uses: addnab/docker-run-action@v3
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
if: ${{ !contains(inputs.instance_type, 'p3') }}
|
||||
id: build-compiler-bindings
|
||||
with:
|
||||
@@ -81,7 +76,7 @@ jobs:
|
||||
-e SSH_AUTH_SOCK=/ssh.socket
|
||||
shell: bash
|
||||
run: |
|
||||
rustup toolchain install nightly-2024-01-31
|
||||
rustup toolchain install nightly-2024-07-01
|
||||
set -e
|
||||
rm -rf /build/*
|
||||
|
||||
@@ -98,7 +93,7 @@ jobs:
|
||||
ccache -s
|
||||
|
||||
- name: Test
|
||||
uses: addnab/docker-run-action@v3
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
if: ${{ !contains(inputs.instance_type, 'p3') }}
|
||||
with:
|
||||
registry: ghcr.io
|
||||
@@ -124,7 +119,7 @@ jobs:
|
||||
KEY_CACHE_DIRECTORY=./KeySetCache make pytest
|
||||
|
||||
- name: Build bindings gpu
|
||||
uses: addnab/docker-run-action@v3
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
if: ${{ contains(inputs.instance_type, 'p3') }}
|
||||
id: build-compiler-bindings-gpu
|
||||
with:
|
||||
@@ -155,7 +150,7 @@ jobs:
|
||||
ccache -s
|
||||
|
||||
- name: Test gpu
|
||||
uses: addnab/docker-run-action@v3
|
||||
uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
|
||||
if: ${{ contains(inputs.instance_type, 'p3') }}
|
||||
with:
|
||||
registry: ghcr.io
|
||||
|
||||
2
.github/workflows/linelint.yml
vendored
2
.github/workflows/linelint.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
- name: Linelint
|
||||
uses: fernandrone/linelint@0.0.4
|
||||
id: linelint
|
||||
|
||||
32
.github/workflows/main.yml
vendored
32
.github/workflows/main.yml
vendored
@@ -39,20 +39,20 @@ jobs:
|
||||
push-main: ${{ steps.github.outputs.push-main }}
|
||||
steps:
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get changed files in the concrete-compiler directory
|
||||
id: compiler
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./compilers/concrete-compiler/**
|
||||
|
||||
- name: Get changed files for concrete-optimizer
|
||||
id: optimizer
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: |
|
||||
./compilers/concrete-optimizer/**
|
||||
@@ -60,43 +60,43 @@ jobs:
|
||||
|
||||
- name: Get changed files in the concrete-cpu directory
|
||||
id: concrete-cpu
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./backends/concrete-cpu/implementation/**
|
||||
|
||||
- name: Get changed files in the concrete-python directory
|
||||
id: concrete-python
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./frontends/concrete-python/**
|
||||
|
||||
- name: Check if compiler_build_and_test_cpu workflow has changed
|
||||
id: concrete-compiler-cpu-workflow
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./.github/workflows/compiler_build_and_test_cpu.yml
|
||||
|
||||
- name: Check if compiler_build_and_test_gpu workflow has changed
|
||||
id: concrete-compiler-gpu-workflow
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./.github/workflows/compiler_build_and_test_gpu.yml
|
||||
|
||||
- name: Check if compiler_format_and_linting.yml workflow has changed
|
||||
id: concrete-compiler-format-and-linting-workflow
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./.github/workflows/compiler_format_and_linting.yml
|
||||
|
||||
- name: Check if compiler_macos_build_and_test workflow has changed
|
||||
id: concrete-compiler-macos-workflow
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./.github/workflows/compiler_macos_build_and_test.yml
|
||||
|
||||
- name: Check if compiler_publish_docker_images workflow has changed
|
||||
id: concrete-compiler-docker-images-workflow
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: |
|
||||
./.github/workflows/compiler_publish_docker_images.yml
|
||||
@@ -104,31 +104,31 @@ jobs:
|
||||
|
||||
- name: Check if concrete_cpu_test workflow has changed
|
||||
id: concrete-cpu-workflow
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./.github/workflows/concrete_cpu_test.yml
|
||||
|
||||
- name: Check if concrete_python_checks workflow has changed
|
||||
id: concrete-python-workflow
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./.github/workflows/concrete_python_checks.yml
|
||||
|
||||
- name: Check if optimizer workflow has changed
|
||||
id: concrete-optimizer-workflow
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./.github/workflows/optimizer.yml
|
||||
|
||||
- name: Get changed files in the concrete-cpu directory
|
||||
id: concrete-cpu-api
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./backends/concrete-cpu/implementation/include/**
|
||||
|
||||
- name: Get changed files in the concrete-cuda directory
|
||||
id: concrete-cuda-api
|
||||
uses: tj-actions/changed-files@e5ce44a30190a3af4c81b960587845596e2300ca
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files: ./backends/concrete-cuda/implementation/include/**
|
||||
|
||||
@@ -252,7 +252,7 @@ jobs:
|
||||
# Concrete-ML tests #############################
|
||||
concrete-ml-tests-linux:
|
||||
needs: file-change
|
||||
if: needs.file-change.outputs.concrete-python == 'true' || needs.file-change.outputs.push-main
|
||||
if: needs.file-change.outputs.concrete-python == 'true' || needs.file-change.outputs.compiler == 'true' || needs.file-change.outputs.push-main
|
||||
uses: ./.github/workflows/start_slab.yml
|
||||
secrets: inherit
|
||||
with:
|
||||
|
||||
3
.github/workflows/markdown_link_check.yml
vendored
3
.github/workflows/markdown_link_check.yml
vendored
@@ -4,6 +4,7 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- '**.md'
|
||||
- .github/workflows/markdown_link_check.yml
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
@@ -13,7 +14,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- uses: gaurav-nelson/github-action-markdown-link-check@v1
|
||||
- uses: gaurav-nelson/github-action-markdown-link-check@5c5dfc0ac2e225883c0e5f03a85311ec2830d368 # v1
|
||||
with:
|
||||
use-quiet-mode: 'yes'
|
||||
use-verbose-mode: 'yes'
|
||||
|
||||
13
.github/workflows/ml_benchmark_subset.yml
vendored
13
.github/workflows/ml_benchmark_subset.yml
vendored
@@ -46,13 +46,8 @@ jobs:
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
|
||||
# SSH private key is required as some dependencies are from private repos
|
||||
- uses: webfactory/ssh-agent@v0.7.0
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.CONCRETE_CI_SSH_PRIVATE }}
|
||||
|
||||
- name: Fetch submodules
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
@@ -86,7 +81,7 @@ jobs:
|
||||
make BINDINGS_PYTHON_ENABLED=OFF ML_BENCH_SUBSET_ID=${{ inputs.matrix_item }} run-mlbench-subset
|
||||
|
||||
- name: Upload raw results artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: ${{ github.sha }}_raw
|
||||
path: compiler/benchmarks_results.json
|
||||
@@ -105,13 +100,13 @@ jobs:
|
||||
--bench-date "${{ env.BENCH_DATE }}"
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: ${{ github.sha }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
10
.github/workflows/optimizer.yml
vendored
10
.github/workflows/optimizer.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
env:
|
||||
RUSTFLAGS: -D warnings
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
|
||||
- name: "Setup"
|
||||
uses: ./.github/workflows/optimizer_setup
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
benchmarks:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
|
||||
- name: "Setup"
|
||||
uses: ./.github/workflows/optimizer_setup
|
||||
@@ -73,7 +73,7 @@ jobs:
|
||||
# for artifacts restrictions see https://github.com/actions/download-artifact/issues/3
|
||||
# for cache restrictions see https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#restrictions-for-accessing-a-cache
|
||||
# and https://github.com/actions/cache/issues/692
|
||||
uses: dawidd6/action-download-artifact@v5
|
||||
uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
workflow_conclusion: ""
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
path: ./benchmark
|
||||
|
||||
- name: Save benchmark result to file
|
||||
uses: benchmark-action/github-action-benchmark@v1
|
||||
uses: benchmark-action/github-action-benchmark@4de1bed97a47495fc4c5404952da0499e31f5c29 # v1.20.3
|
||||
with:
|
||||
tool: 'cargo'
|
||||
output-file-path: ./compilers/concrete-optimizer/bench_result.txt
|
||||
@@ -96,7 +96,7 @@ jobs:
|
||||
summary-always: true
|
||||
|
||||
- name: Upload benchmark data
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
path: ./benchmark
|
||||
name: ${{ runner.os }}-benchmark
|
||||
|
||||
10
.github/workflows/optimizer_setup/action.yml
vendored
10
.github/workflows/optimizer_setup/action.yml
vendored
@@ -7,15 +7,9 @@ runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Rust install
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Download cargo cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
|
||||
# A SSH private key is required as some dependencies are from private repos
|
||||
- name: Set ssh keys
|
||||
uses: webfactory/ssh-agent@v0.6.0
|
||||
with:
|
||||
ssh-private-key: ${{ inputs.ssh_private_key }}
|
||||
uses: Swatinem/rust-cache@23bce251a8cd2ffc3c1075eaa2367cf899916d84 # v2.7.3
|
||||
|
||||
2
.github/workflows/refresh_zama_pypi.yml
vendored
2
.github/workflows/refresh_zama_pypi.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
refresh:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v3
|
||||
|
||||
- name: Update index and invalidate cache
|
||||
env:
|
||||
|
||||
@@ -2,7 +2,7 @@ runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
|
||||
with:
|
||||
toolchain: nightly-2024-01-31
|
||||
toolchain: nightly-2024-07-01
|
||||
override: true
|
||||
|
||||
4
.github/workflows/start_slab.yml
vendored
4
.github/workflows/start_slab.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout concrete
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
echo "GIT_REF=${{ github.ref_name }}" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
@@ -129,7 +129,7 @@ assert result == add(2, 6)
|
||||
- [[Video tutorial] How To Get Started With Concrete - Zama's Fully Homomorphic Encryption Compiler](https://www.zama.ai/post/how-to-started-with-concrete-zama-fully-homomorphic-encryption-compiler)
|
||||
- [The Encrypted Game of Life in Python Using Concrete](https://www.zama.ai/post/the-encrypted-game-of-life-using-concrete-python)
|
||||
- [Encrypted Key-value Database Using Homomorphic Encryption](https://www.zama.ai/post/encrypted-key-value-database-using-homomorphic-encryption)
|
||||
- [SHA-256 Implementation Using Concrete](https://github.com/zama-ai/concrete/blob/main/docs/application-tutorial/sha256.ipynb)
|
||||
- [SHA-256 Implementation Using Concrete](https://github.com/zama-ai/concrete/blob/main/frontends/concrete-python/examples/sha256/sha256.ipynb)
|
||||
|
||||
*Explore more useful resources in [Concrete tutorials](https://docs.zama.ai/concrete/v/main-1/tutorials/see-all-tutorials) and [Awesome Zama repo](https://github.com/zama-ai/awesome-zama?tab=readme-ov-file#concrete). If you have built awesome projects using Concrete, please let us know and we will be happy to showcase them here!*
|
||||
|
||||
|
||||
146
backends/concrete-cpu/implementation/Cargo.lock
generated
146
backends/concrete-cpu/implementation/Cargo.lock
generated
@@ -45,6 +45,15 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "bincode"
|
||||
version = "1.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
@@ -59,9 +68,9 @@ checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.13.1"
|
||||
version = "1.16.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea"
|
||||
checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e"
|
||||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
@@ -170,14 +179,14 @@ dependencies = [
|
||||
"cbindgen",
|
||||
"concrete-cpu-noise-model",
|
||||
"concrete-csprng",
|
||||
"concrete-fft 0.2.1",
|
||||
"concrete-fft",
|
||||
"concrete-security-curves",
|
||||
"criterion",
|
||||
"dyn-stack",
|
||||
"libc",
|
||||
"num-complex",
|
||||
"once_cell",
|
||||
"pulp 0.10.4",
|
||||
"pulp",
|
||||
"rayon",
|
||||
"readonly",
|
||||
"tfhe",
|
||||
@@ -193,9 +202,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "concrete-csprng"
|
||||
version = "0.4.0"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c7080e711c39c3dda834604f7c31f4634a38b8dddeb9f24db5d7801751415ad"
|
||||
checksum = "90518357249582c16a6b64d7410243dfb3109d5bf0ad1665c058c9a59f2fc4cc"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"libc",
|
||||
@@ -204,29 +213,26 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "concrete-fft"
|
||||
version = "0.2.1"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ab720f85aa80be9d78fba5a941458ec50d97c7856aced76707f0bf227a0c6ca"
|
||||
checksum = "3144f883422ee22c65d4f408c11b3406513eadc0d50b3d65bfadcb97852817e0"
|
||||
dependencies = [
|
||||
"aligned-vec",
|
||||
"bytemuck",
|
||||
"dyn-stack",
|
||||
"num-complex",
|
||||
"pulp 0.11.11",
|
||||
"pulp",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "concrete-fft"
|
||||
version = "0.3.0"
|
||||
name = "concrete-ntt"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdb823cf0c7a83ac8f7e38bcda73d754fb8f57797f342cdd7c20a61a1db8b058"
|
||||
checksum = "b4f4643dbd5de069e099122ae6c2bbd3db70d69ffec348dfc228448d635f949e"
|
||||
dependencies = [
|
||||
"aligned-vec",
|
||||
"bytemuck",
|
||||
"dyn-stack",
|
||||
"num-complex",
|
||||
"pulp 0.11.11",
|
||||
"serde",
|
||||
"pulp",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -387,6 +393,17 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "half"
|
||||
version = "1.8.2"
|
||||
@@ -500,9 +517,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.147"
|
||||
version = "0.2.155"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
@@ -527,9 +550,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "num-complex"
|
||||
version = "0.4.3"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d"
|
||||
checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"num-traits",
|
||||
@@ -573,6 +596,12 @@ version = "6.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac"
|
||||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "1.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
|
||||
|
||||
[[package]]
|
||||
name = "plotters"
|
||||
version = "0.3.5"
|
||||
@@ -612,30 +641,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pulp"
|
||||
version = "0.10.4"
|
||||
version = "0.18.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "284c392c810680912400c6f70879a8cde404344db6b68ff52cc3990c020324d1"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulp"
|
||||
version = "0.11.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "866e8018d6397b0717100dd4a7948fc8cbc8c4b8ce3e39e98a0e1e878d3ba925"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulp"
|
||||
version = "0.13.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50ff10f8f3224a1cab64371fbab0fff9b9cb5892076eb9032dc825c2e50894e4"
|
||||
checksum = "0ec8d02258294f59e4e223b41ad7e81c874aa6b15bc4ced9ba3965826da0eed5"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"libm",
|
||||
"num-complex",
|
||||
"reborrow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -647,6 +660,15 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.7.0"
|
||||
@@ -835,19 +857,47 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
|
||||
|
||||
[[package]]
|
||||
name = "tfhe"
|
||||
version = "0.4.1"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df9c5cc5c4a1bd0bcfc2e7a3e83a8851dab02ff59d63ca8bc17da0e069498f71"
|
||||
checksum = "273f968d322dd02cd76a51fe8e2312b5807631d33e99ea06ccfd8034d558c562"
|
||||
dependencies = [
|
||||
"aligned-vec",
|
||||
"bincode",
|
||||
"bytemuck",
|
||||
"concrete-csprng",
|
||||
"concrete-fft 0.3.0",
|
||||
"concrete-fft",
|
||||
"concrete-ntt",
|
||||
"dyn-stack",
|
||||
"itertools 0.11.0",
|
||||
"pulp 0.13.2",
|
||||
"paste",
|
||||
"pulp",
|
||||
"rand_core",
|
||||
"rayon",
|
||||
"serde",
|
||||
"tfhe-versionable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tfhe-versionable"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7977829d8b2d59a16d9780ffbf8e4084a8d67f32c0e557b647136d094d391b2"
|
||||
dependencies = [
|
||||
"aligned-vec",
|
||||
"num-complex",
|
||||
"serde",
|
||||
"tfhe-versionable-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tfhe-versionable-derive"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "754aede9c522d81d852b46714d42137d307f418b8b6eed4db03d1466080f4f9d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.22",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -897,6 +947,12 @@ dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.87"
|
||||
|
||||
@@ -10,18 +10,18 @@ crate-type = ["lib", "staticlib"]
|
||||
|
||||
|
||||
[dependencies]
|
||||
concrete-csprng = { version = "0.4", optional = true, features = [
|
||||
concrete-csprng = { version = "0.4.1", optional = true, features = [
|
||||
"generator_fallback",
|
||||
] }
|
||||
concrete-cpu-noise-model = { path = "../noise-model/" }
|
||||
concrete-security-curves = { path = "../../../tools/parameter-curves/concrete-security-curves-rust" }
|
||||
libc = { version = "0.2", default-features = false }
|
||||
pulp = { version = "0.10", default-features = false }
|
||||
pulp = { version = "0.18.8", default-features = false }
|
||||
dyn-stack = { version = "0.9", default-features = false }
|
||||
readonly = "0.2"
|
||||
aligned-vec = { version = "0.5", default-features = false }
|
||||
concrete-fft = { version = "0.2.1", default-features = false }
|
||||
bytemuck = "1.12"
|
||||
concrete-fft = { version = "0.4.1", default-features = false }
|
||||
bytemuck = "1.14.3"
|
||||
num-complex = { version = "0.4", default-features = false, features = [
|
||||
"bytemuck",
|
||||
] }
|
||||
@@ -29,16 +29,16 @@ num-complex = { version = "0.4", default-features = false, features = [
|
||||
rayon = { version = "1.6", optional = true }
|
||||
once_cell = { version = "1.16", optional = true }
|
||||
|
||||
tfhe = { version = "0.4", features = [] }
|
||||
tfhe = { version = "0.7", features = [] }
|
||||
|
||||
[target.x86_64-unknown-unix-gnu.dependencies]
|
||||
tfhe = { version = "0.4", features = ["x86_64-unix"] }
|
||||
tfhe = { version = "0.7", features = ["x86_64-unix"] }
|
||||
|
||||
[target.aarch64-unknown-unix-gnu.dependencies]
|
||||
tfhe = { version = "0.4", features = ["aarch64-unix"] }
|
||||
tfhe = { version = "0.7", features = ["aarch64-unix"] }
|
||||
|
||||
[target.x86_64-pc-windows-gnu.dependencies]
|
||||
tfhe = { version = "0.4", features = ["x86_64"] }
|
||||
tfhe = { version = "0.7", features = ["x86_64"] }
|
||||
|
||||
[features]
|
||||
default = ["parallel", "std", "csprng"]
|
||||
|
||||
@@ -73,14 +73,14 @@ pub unsafe extern "C" fn concrete_cpu_init_lwe_bootstrap_key_u64(
|
||||
&lwe_sk,
|
||||
&glwe_sk,
|
||||
&mut bsk,
|
||||
Variance::from_variance(variance),
|
||||
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
|
||||
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
|
||||
),
|
||||
Parallelism::Rayon => par_generate_lwe_bootstrap_key(
|
||||
&lwe_sk,
|
||||
&glwe_sk,
|
||||
&mut bsk,
|
||||
Variance::from_variance(variance),
|
||||
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
|
||||
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
|
||||
),
|
||||
}
|
||||
@@ -151,14 +151,14 @@ pub unsafe extern "C" fn concrete_cpu_init_seeded_lwe_bootstrap_key_u64(
|
||||
&lwe_sk,
|
||||
&glwe_sk,
|
||||
&mut bsk,
|
||||
Variance::from_variance(variance),
|
||||
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
|
||||
seeder,
|
||||
),
|
||||
Parallelism::Rayon => par_generate_seeded_lwe_bootstrap_key(
|
||||
&lwe_sk,
|
||||
&glwe_sk,
|
||||
&mut bsk,
|
||||
Variance::from_variance(variance),
|
||||
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
|
||||
seeder,
|
||||
),
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ pub unsafe extern "C" fn concrete_cpu_init_lwe_keyswitch_key_u64(
|
||||
&input_key,
|
||||
&output_key,
|
||||
&mut ksk,
|
||||
Variance::from_variance(variance),
|
||||
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
|
||||
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
|
||||
)
|
||||
});
|
||||
@@ -110,7 +110,7 @@ pub unsafe extern "C" fn concrete_cpu_init_seeded_lwe_keyswitch_key_u64(
|
||||
&input_key,
|
||||
&output_key,
|
||||
&mut seeded_ksk,
|
||||
Variance::from_variance(variance),
|
||||
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
|
||||
seeder,
|
||||
)
|
||||
});
|
||||
|
||||
@@ -53,7 +53,7 @@ pub unsafe extern "C" fn concrete_cpu_encrypt_lwe_ciphertext_u64(
|
||||
&lwe_sk,
|
||||
&mut lwe_out,
|
||||
Plaintext(input),
|
||||
Variance::from_variance(variance),
|
||||
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
|
||||
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
|
||||
);
|
||||
});
|
||||
@@ -95,7 +95,7 @@ pub unsafe extern "C" fn concrete_cpu_encrypt_seeded_lwe_ciphertext_u64(
|
||||
&lwe_sk,
|
||||
&mut seeded_lwe_ciphertext,
|
||||
Plaintext(input),
|
||||
Variance::from_variance(variance),
|
||||
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
|
||||
seeder,
|
||||
);
|
||||
*seeded_lwe_out = seeded_lwe_ciphertext.into_scalar();
|
||||
@@ -145,7 +145,7 @@ pub unsafe extern "C" fn concrete_cpu_encrypt_ggsw_ciphertext_u64(
|
||||
&glwe_sk,
|
||||
&mut ggsw_out,
|
||||
Plaintext(input),
|
||||
Variance::from_variance(variance),
|
||||
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
|
||||
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
|
||||
);
|
||||
});
|
||||
|
||||
@@ -71,14 +71,14 @@ pub unsafe extern "C" fn concrete_cpu_init_lwe_circuit_bootstrap_private_functio
|
||||
&mut fpksk_list,
|
||||
&input_key,
|
||||
&output_key,
|
||||
Variance::from_variance(variance),
|
||||
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
|
||||
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
|
||||
),
|
||||
Parallelism::Rayon => par_generate_circuit_bootstrap_lwe_pfpksk_list(
|
||||
&mut fpksk_list,
|
||||
&input_key,
|
||||
&output_key,
|
||||
Variance::from_variance(variance),
|
||||
Gaussian::from_dispersion_parameter(Variance::from_variance(variance), 0.0),
|
||||
&mut *(csprng as *mut EncryptionRandomGenerator<SoftwareRandomGenerator>),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#![allow(clippy::missing_safety_doc, dead_code)]
|
||||
#![cfg_attr(feature = "nightly", feature(stdsimd))]
|
||||
#![cfg_attr(feature = "nightly", feature(avx512_target_feature))]
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
#include <err.h>
|
||||
#include "bootstrap_amortized.cuh"
|
||||
|
||||
/*
|
||||
@@ -85,6 +86,8 @@ void scratch_cuda_bootstrap_amortized_32(void *v_stream, uint32_t gpu_index,
|
||||
input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -141,6 +144,8 @@ void scratch_cuda_bootstrap_amortized_64(void *v_stream, uint32_t gpu_index,
|
||||
input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory);
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -216,6 +221,8 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -353,6 +360,8 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
#include <err.h>
|
||||
#include "bootstrap_fast_low_latency.cuh"
|
||||
#include "bootstrap_low_latency.cuh"
|
||||
/*
|
||||
@@ -100,6 +101,8 @@ uint64_t get_buffer_size_bootstrap_low_latency_64(
|
||||
input_lwe_ciphertext_count, max_shared_memory);
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
@@ -244,6 +247,8 @@ void scratch_cuda_bootstrap_low_latency_32(
|
||||
allocate_gpu_memory);
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -377,6 +382,8 @@ void scratch_cuda_bootstrap_low_latency_64(
|
||||
allocate_gpu_memory);
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -527,6 +534,8 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
num_samples, num_lut_vectors, max_shared_memory);
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -744,7 +753,10 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
(uint64_t *)lwe_array_in, (double2 *)bootstrapping_key, pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, base_log, level_count,
|
||||
num_samples, num_lut_vectors, max_shared_memory);
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
#include <err.h>
|
||||
#include "bootstrap_fast_multibit.cuh"
|
||||
#include "bootstrap_multibit.cuh"
|
||||
#include "bootstrap_multibit.h"
|
||||
@@ -172,6 +173,8 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
}
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -311,6 +314,8 @@ void scratch_cuda_multi_bit_pbs_64(
|
||||
}
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "polynomial/polynomial.cuh"
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <err.h>
|
||||
|
||||
__device__ inline int get_start_ith_ggsw(int i, uint32_t polynomial_size,
|
||||
int glwe_dimension,
|
||||
@@ -235,6 +236,8 @@ void cuda_convert_lwe_bootstrap_key(double2 *dest, ST *src, void *v_stream,
|
||||
}
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -446,6 +449,8 @@ void cuda_fourier_polynomial_mul(void *_input1, void *_input2, void *_output,
|
||||
}
|
||||
break;
|
||||
default:
|
||||
errx(EXIT_FAILURE, "polynomial size %u is not supported. Supported values "
|
||||
"are: 256, 512, 1024, 2048, 4096, 8192, 16384.", polynomial_size);
|
||||
break;
|
||||
}
|
||||
cuda_drop_async(buffer, stream, gpu_index);
|
||||
|
||||
@@ -31,20 +31,6 @@ pip install lazydocs
|
||||
.venvtrash/bin/lazydocs --output-path="../docs/dev/api" --overview-file="README.md" --src-base-url="../../" --no-watermark concrete
|
||||
cd -
|
||||
|
||||
# Add the files in the summary
|
||||
FILES=$(cd docs && find dev/api -name "*.md")
|
||||
|
||||
TMP_FILE=$(mktemp /tmp/apidocs.XXXXXX)
|
||||
rm -rf "$TMP_FILE"
|
||||
touch "$TMP_FILE"
|
||||
|
||||
for f in $FILES
|
||||
do
|
||||
filename=$(echo "$f" | rev | cut -d '/' -f 1 | rev)
|
||||
|
||||
echo " * [$filename]($f)" >> "$TMP_FILE"
|
||||
done
|
||||
|
||||
rm -rf "$FRESH_DIRECTORY"
|
||||
|
||||
# New files?
|
||||
|
||||
@@ -206,6 +206,9 @@ then
|
||||
git diff
|
||||
fi
|
||||
|
||||
# Update the pandas files in CML
|
||||
make update_encrypted_dataframe
|
||||
|
||||
# Launch CML tests with pytest (and ignore flaky ones)
|
||||
# As compared to regular `make pytest`, known flaky errors from Concrete ML are simply ignored
|
||||
# and coverage is disabled
|
||||
|
||||
21
ci/slab.toml
21
ci/slab.toml
@@ -1,32 +1,42 @@
|
||||
# This is the new version of Slab that handles multi backend providers.
|
||||
[backend.aws.m7i-cpu-bench]
|
||||
region = "eu-west-1"
|
||||
image_id = "ami-002bdcd64b8472cf9" # Based on Ubuntu 22.4
|
||||
instance_type = "m7i.48xlarge"
|
||||
security_group = ["sg-0e55cc31dfda0d8a7", ]
|
||||
|
||||
[profile.m7i-cpu-bench]
|
||||
region = "eu-west-1"
|
||||
image_id = "ami-002bdcd64b8472cf9" # Based on Ubuntu 22.4
|
||||
instance_type = "m7i.48xlarge"
|
||||
security_group= ["sg-0e55cc31dfda0d8a7", ]
|
||||
|
||||
[profile.m7i-cpu-test]
|
||||
region = "eu-west-1"
|
||||
image_id = "ami-002bdcd64b8472cf9"
|
||||
instance_type = "m7i.16xlarge"
|
||||
security_group= ["sg-0e55cc31dfda0d8a7", ]
|
||||
|
||||
[profile.m7i-metal]
|
||||
region = "eu-west-1"
|
||||
image_id = "ami-002bdcd64b8472cf9"
|
||||
instance_type = "m7i.metal-24xl"
|
||||
security_group= ["sg-0e55cc31dfda0d8a7", ]
|
||||
|
||||
[profile.gpu-bench]
|
||||
region = "us-east-1"
|
||||
image_id = "ami-08e27480d79e82238"
|
||||
instance_type = "p3.2xlarge"
|
||||
subnet_id = "subnet-8123c9e7"
|
||||
security_group= ["sg-0f8b52622a2669491", ]
|
||||
security_group= ["sg-017afab1f328af917", ]
|
||||
|
||||
# Docker is well configured for test inside docker in this AMI
|
||||
[profile.gpu-test]
|
||||
region = "us-east-1"
|
||||
image_id = "ami-0c4773f5626d919b6"
|
||||
image_id = "ami-0257c6ad39f902b5e"
|
||||
instance_type = "p3.2xlarge"
|
||||
subnet_id = "subnet-8123c9e7"
|
||||
security_group= ["sg-0f8b52622a2669491", ]
|
||||
security_group= ["sg-017afab1f328af917", ]
|
||||
|
||||
# It has CUDA Driver (<=12.5) and Docker installed
|
||||
[profile.gpu-test-ubuntu22]
|
||||
@@ -34,13 +44,14 @@ region = "us-east-1"
|
||||
image_id = "ami-05385e0c3c574621f"
|
||||
instance_type = "p3.2xlarge"
|
||||
subnet_id = "subnet-8123c9e7"
|
||||
security_group= ["sg-0f8b52622a2669491", ]
|
||||
security_group= ["sg-017afab1f328af917", ]
|
||||
|
||||
[profile.slurm-cluster]
|
||||
region = "eu-west-3"
|
||||
image_id = "ami-0bb5bb9cb747b5ddd"
|
||||
instance_id = "i-0e5ae2a14134d6275"
|
||||
instance_type = "m6i.8xlarge"
|
||||
security_group= ["sg-02dd8470fa845f31b", ]
|
||||
|
||||
#################################################
|
||||
# Compiler commands
|
||||
@@ -121,5 +132,5 @@ check_run_name = "Concrete Python Release (GPU)"
|
||||
|
||||
[command.concrete-python-test-gpu-wheel]
|
||||
workflow = "concrete_python_test_gpu_wheel.yml"
|
||||
profile = "gpu-test-ubuntu22"
|
||||
profile = "gpu-test"
|
||||
check_run_name = "Concrete Python Test GPU Wheel"
|
||||
|
||||
@@ -136,7 +136,7 @@ ExternalProject_Add(
|
||||
DOWNLOAD_COMMAND ""
|
||||
CONFIGURE_COMMAND "" OUTPUT "${CONCRETE_CPU_STATIC_LIB}"
|
||||
BUILD_ALWAYS true
|
||||
BUILD_COMMAND cargo +nightly-2024-01-31 build --release --features=nightly
|
||||
BUILD_COMMAND cargo +nightly-2024-07-01 build --release --features=nightly
|
||||
BINARY_DIR "${CONCRETE_CPU_DIR}"
|
||||
INSTALL_COMMAND ""
|
||||
LOG_BUILD ON
|
||||
|
||||
@@ -329,19 +329,9 @@ run-end-to-end-tests-gpu: build-end-to-end-test generate-gpu-tests
|
||||
--backend=gpu \
|
||||
$(FIXTURE_GPU_DIR)/*.yaml
|
||||
|
||||
## end-to-end-dataflow-tests
|
||||
|
||||
build-end-to-end-dataflow-tests: build-initialized
|
||||
cmake --build $(BUILD_DIR) --target end_to_end_jit_auto_parallelization
|
||||
cmake --build $(BUILD_DIR) --target end_to_end_jit_distributed
|
||||
cmake --build $(BUILD_DIR) --target end_to_end_jit_aes_short
|
||||
|
||||
run-end-to-end-dataflow-tests: build-end-to-end-dataflow-tests
|
||||
$(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_auto_parallelization
|
||||
$(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_distributed
|
||||
## end-to-end-distributed-tests
|
||||
|
||||
run-end-to-end-distributed-tests: $(GTEST_PARALLEL_PY) build-end-to-end-tests generate-cpu-tests
|
||||
srun -n4 -c8 --kill-on-bad-exit=1 $(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_distributed
|
||||
srun -n4 -c8 --kill-on-bad-exit=1 $(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_test \
|
||||
--optimizer-strategy=dag-mono --dataflow-parallelize=1 \
|
||||
$(FIXTURE_CPU_DIR)/*round*.yaml $(FIXTURE_CPU_DIR)/*relu*.yaml $(FIXTURE_CPU_DIR)/*linalg*.yaml
|
||||
@@ -562,8 +552,6 @@ FORCE:
|
||||
run-unit-tests \
|
||||
run-python-tests \
|
||||
build-end-to-end-tests \
|
||||
build-end-to-end-dataflow-tests \
|
||||
run-end-to-end-dataflow-tests \
|
||||
run-random-end-to-end-tests-for-each-options \
|
||||
opt \
|
||||
mlir-opt \
|
||||
|
||||
@@ -148,6 +148,9 @@ You can create a tarball containing libs, bins, and include files for the tools
|
||||
|
||||
### Build the Python Package
|
||||
|
||||
> [!IMPORTANT]
|
||||
> The wheel built in the following steps is for `concrete-compiler` (which doesn't have the frontend layer) and not `concrete-python`. If you are interested in the `concrete-python` package, then you should build it from [here](https://github.com/zama-ai/concrete/tree/main/frontends/concrete-python) instead.
|
||||
|
||||
Currently supported platforms:
|
||||
- Linux x86_64 for python 3.8, 3.9, 3.10, and 3.11
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ include "concretelang/Dialect/FHE/Interfaces/FHEInterfaces.td"
|
||||
class FHE_Op<string mnemonic, list<Trait> traits = []> :
|
||||
Op<FHE_Dialect, mnemonic, traits>;
|
||||
|
||||
def FHE_ZeroEintOp : FHE_Op<"zero", [Pure, ConstantNoise]> {
|
||||
def FHE_ZeroEintOp : FHE_Op<"zero", [Pure, ZeroNoise]> {
|
||||
let summary = "Returns a trivial encrypted integer of 0";
|
||||
|
||||
let description = [{
|
||||
@@ -34,7 +34,7 @@ def FHE_ZeroEintOp : FHE_Op<"zero", [Pure, ConstantNoise]> {
|
||||
let results = (outs FHE_AnyEncryptedInteger:$out);
|
||||
}
|
||||
|
||||
def FHE_ZeroTensorOp : FHE_Op<"zero_tensor", [Pure, ConstantNoise]> {
|
||||
def FHE_ZeroTensorOp : FHE_Op<"zero_tensor", [Pure, ZeroNoise]> {
|
||||
let summary = "Creates a new tensor with all elements initialized to an encrypted zero.";
|
||||
|
||||
let description = [{
|
||||
@@ -52,7 +52,7 @@ def FHE_ZeroTensorOp : FHE_Op<"zero_tensor", [Pure, ConstantNoise]> {
|
||||
let results = (outs Type<And<[TensorOf<[FHE_AnyEncryptedInteger]>.predicate, HasStaticShapePred]>>:$tensor);
|
||||
}
|
||||
|
||||
def FHE_AddEintIntOp : FHE_Op<"add_eint_int", [Pure, BinaryEintInt, DeclareOpInterfaceMethods<Binary>]> {
|
||||
def FHE_AddEintIntOp : FHE_Op<"add_eint_int", [Pure, BinaryEintInt, AdditiveNoise, DeclareOpInterfaceMethods<Binary>]> {
|
||||
let summary = "Adds an encrypted integer and a clear integer";
|
||||
|
||||
let description = [{
|
||||
@@ -85,7 +85,7 @@ def FHE_AddEintIntOp : FHE_Op<"add_eint_int", [Pure, BinaryEintInt, DeclareOpInt
|
||||
let hasFolder = 1;
|
||||
}
|
||||
|
||||
def FHE_AddEintOp : FHE_Op<"add_eint", [Pure, BinaryEint, DeclareOpInterfaceMethods<BinaryEint>]> {
|
||||
def FHE_AddEintOp : FHE_Op<"add_eint", [Pure, BinaryEint, AdditiveNoise, DeclareOpInterfaceMethods<BinaryEint>]> {
|
||||
let summary = "Adds two encrypted integers";
|
||||
|
||||
let description = [{
|
||||
@@ -117,7 +117,7 @@ def FHE_AddEintOp : FHE_Op<"add_eint", [Pure, BinaryEint, DeclareOpInterfaceMeth
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def FHE_SubIntEintOp : FHE_Op<"sub_int_eint", [Pure, BinaryIntEint]> {
|
||||
def FHE_SubIntEintOp : FHE_Op<"sub_int_eint", [Pure, BinaryIntEint, AdditiveNoise]> {
|
||||
let summary = "Subtract an encrypted integer from a clear integer";
|
||||
|
||||
let description = [{
|
||||
@@ -149,7 +149,7 @@ def FHE_SubIntEintOp : FHE_Op<"sub_int_eint", [Pure, BinaryIntEint]> {
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def FHE_SubEintIntOp : FHE_Op<"sub_eint_int", [Pure, BinaryEintInt, DeclareOpInterfaceMethods<Binary>]> {
|
||||
def FHE_SubEintIntOp : FHE_Op<"sub_eint_int", [Pure, BinaryEintInt, AdditiveNoise, DeclareOpInterfaceMethods<Binary>]> {
|
||||
let summary = "Subtract a clear integer from an encrypted integer";
|
||||
|
||||
let description = [{
|
||||
@@ -182,7 +182,7 @@ def FHE_SubEintIntOp : FHE_Op<"sub_eint_int", [Pure, BinaryEintInt, DeclareOpInt
|
||||
let hasFolder = 1;
|
||||
}
|
||||
|
||||
def FHE_SubEintOp : FHE_Op<"sub_eint", [Pure, BinaryEint, DeclareOpInterfaceMethods<BinaryEint>]> {
|
||||
def FHE_SubEintOp : FHE_Op<"sub_eint", [Pure, BinaryEint, AdditiveNoise, DeclareOpInterfaceMethods<BinaryEint>]> {
|
||||
let summary = "Subtract an encrypted integer from an encrypted integer";
|
||||
|
||||
let description = [{
|
||||
@@ -214,7 +214,7 @@ def FHE_SubEintOp : FHE_Op<"sub_eint", [Pure, BinaryEint, DeclareOpInterfaceMeth
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def FHE_NegEintOp : FHE_Op<"neg_eint", [Pure, UnaryEint, DeclareOpInterfaceMethods<UnaryEint>]> {
|
||||
def FHE_NegEintOp : FHE_Op<"neg_eint", [Pure, UnaryEint, AdditiveNoise, DeclareOpInterfaceMethods<UnaryEint>]> {
|
||||
|
||||
let summary = "Negates an encrypted integer";
|
||||
|
||||
|
||||
@@ -37,6 +37,22 @@ def ConstantNoise : OpInterface<"ConstantNoise"> {
|
||||
let cppNamespace = "mlir::concretelang::FHE";
|
||||
}
|
||||
|
||||
def ZeroNoise : OpInterface<"ZeroNoise"> {
|
||||
let description = [{
|
||||
An operation outputs a ciphertext with zero noise.
|
||||
}];
|
||||
|
||||
let cppNamespace = "mlir::concretelang::FHE";
|
||||
}
|
||||
|
||||
def AdditiveNoise : OpInterface<"AdditiveNoise"> {
|
||||
let description = [{
|
||||
An n-ary operation whose output noise is the unweighted sum of all input noises.
|
||||
}];
|
||||
|
||||
let cppNamespace = "mlir::concretelang::FHE";
|
||||
}
|
||||
|
||||
def UnaryEint : OpInterface<"UnaryEint"> {
|
||||
let description = [{
|
||||
A unary operation on scalars, with the operand encrypted.
|
||||
@@ -63,7 +79,7 @@ def UnaryEint : OpInterface<"UnaryEint"> {
|
||||
if (auto operandTy = dyn_cast<mlir::RankedTensorType>($_op->getOpOperand(0).get().getType())) {
|
||||
return operandTy.getElementType();
|
||||
} else return $_op->getOpOperand(0).get().getType();
|
||||
}]>
|
||||
}]>
|
||||
];
|
||||
}
|
||||
|
||||
@@ -124,8 +140,8 @@ def Binary : OpInterface<"Binary"> {
|
||||
if (auto cstOp = llvm::dyn_cast_or_null<mlir::arith::ConstantOp>($_op->
|
||||
getOpOperand(opNum).get().getDefiningOp()))
|
||||
return cstOp->template getAttrOfType<mlir::DenseIntElementsAttr>("value").template getValues<llvm::APInt>();
|
||||
else return {};
|
||||
}]>,
|
||||
else return {};
|
||||
}]>,
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
add_subdirectory(Interfaces)
|
||||
add_subdirectory(IR)
|
||||
add_subdirectory(Transforms)
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
set(LLVM_TARGET_DEFINITIONS Passes.td)
|
||||
mlir_tablegen(Passes.h.inc -gen-pass-decls -name Transforms)
|
||||
add_public_tablegen_target(ConcretelangSDFGTransformsPassIncGen)
|
||||
@@ -0,0 +1,26 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#ifndef CONCRETELANG_SDFG_TRANSFORMS_PASS_H
|
||||
#define CONCRETELANG_SDFG_TRANSFORMS_PASS_H
|
||||
|
||||
#include <mlir/Dialect/Func/IR/FuncOps.h>
|
||||
#include <mlir/Dialect/Linalg/IR/Linalg.h>
|
||||
#include <mlir/Dialect/MemRef/IR/MemRef.h>
|
||||
#include <mlir/Dialect/SCF/IR/SCF.h>
|
||||
#include <mlir/Pass/Pass.h>
|
||||
|
||||
#define GEN_PASS_CLASSES
|
||||
#include <concretelang/Dialect/SDFG/Transforms/Passes.h.inc>
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
|
||||
std::unique_ptr<mlir::Pass> createSDFGBufferOwnershipPass();
|
||||
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,17 @@
|
||||
#ifndef CONCRETELANG_SDFG_PASSES
|
||||
#define CONCRETELANG_SDFG_PASSES
|
||||
|
||||
include "mlir/Pass/PassBase.td"
|
||||
|
||||
def SDFGBufferOwnership : Pass<"SDFGBufferOwnership", "mlir::ModuleOp"> {
|
||||
let summary =
|
||||
"Take ownership of data passed to SDFG operators.";
|
||||
|
||||
let description = [{ As data used in SDFG operators, in particular
|
||||
PUT operations, can be used asynchronously, deallocation must be
|
||||
handled by the runtime if we take ownership. This pass removes
|
||||
explicit deallocation calls where no other uses of the data exist
|
||||
and makes copies otherwise, letting the runtime handle
|
||||
deallocation when appropriate.}]; }
|
||||
|
||||
#endif
|
||||
@@ -25,6 +25,8 @@ bool _dfr_is_jit();
|
||||
bool _dfr_is_root_node();
|
||||
bool _dfr_use_omp();
|
||||
bool _dfr_is_distributed();
|
||||
void _dfr_run_remote_scheduler();
|
||||
void _dfr_register_lib(void *dlh);
|
||||
|
||||
typedef enum _dfr_task_arg_type {
|
||||
_DFR_TASK_ARG_BASE = 0,
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#ifndef CONCRETELANG_GPUDFG_HPP
|
||||
#define CONCRETELANG_GPUDFG_HPP
|
||||
|
||||
#ifdef CONCRETELANG_CUDA_SUPPORT
|
||||
#include "bootstrap.h"
|
||||
#include "device.h"
|
||||
#include "keyswitch.h"
|
||||
#include "linear_algebra.h"
|
||||
|
||||
#endif
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
namespace gpu_dfg {
|
||||
|
||||
bool check_cuda_device_available();
|
||||
bool check_cuda_runtime_enabled();
|
||||
|
||||
} // namespace gpu_dfg
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
|
||||
#endif
|
||||
@@ -109,10 +109,8 @@ struct RuntimeContextManager {
|
||||
bool allocated = false;
|
||||
bool lazy_key_transfer = false;
|
||||
|
||||
RuntimeContextManager(bool lazy = false) : lazy_key_transfer(lazy) {
|
||||
context = nullptr;
|
||||
_dfr_node_level_runtime_context_manager = this;
|
||||
}
|
||||
RuntimeContextManager(bool lazy = false)
|
||||
: context(nullptr), lazy_key_transfer(lazy) {}
|
||||
|
||||
void setContext(void *ctx) {
|
||||
assert(context == nullptr &&
|
||||
|
||||
@@ -83,7 +83,8 @@ uint64_t stream_emulator_get_uint64(void *stream);
|
||||
void *stream_emulator_make_memref_stream(const char *name, stream_type stype);
|
||||
void stream_emulator_put_memref(void *stream, uint64_t *allocated,
|
||||
uint64_t *aligned, uint64_t offset,
|
||||
uint64_t size, uint64_t stride);
|
||||
uint64_t size, uint64_t stride,
|
||||
uint64_t data_ownership);
|
||||
void stream_emulator_get_memref(void *stream, uint64_t *out_allocated,
|
||||
uint64_t *out_aligned, uint64_t out_offset,
|
||||
uint64_t out_size, uint64_t out_stride);
|
||||
@@ -93,7 +94,8 @@ void *stream_emulator_make_memref_batch_stream(const char *name,
|
||||
void stream_emulator_put_memref_batch(void *stream, uint64_t *allocated,
|
||||
uint64_t *aligned, uint64_t offset,
|
||||
uint64_t size0, uint64_t size1,
|
||||
uint64_t stride0, uint64_t stride1);
|
||||
uint64_t stride0, uint64_t stride1,
|
||||
uint64_t data_ownership);
|
||||
void stream_emulator_get_memref_batch(void *stream, uint64_t *out_allocated,
|
||||
uint64_t *out_aligned,
|
||||
uint64_t out_offset, uint64_t out_size0,
|
||||
|
||||
@@ -10,40 +10,156 @@
|
||||
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "concretelang/Runtime/DFRuntime.hpp"
|
||||
|
||||
#define TIME_UTIL_CLOCK CLOCK_MONOTONIC
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
namespace time_util {
|
||||
|
||||
extern bool timing_enabled;
|
||||
extern struct timespec timestamp;
|
||||
|
||||
} // namespace time_util
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
|
||||
static inline int timespec_diff(struct timespec *, const struct timespec *,
|
||||
const struct timespec *);
|
||||
|
||||
#define CONCRETELANG_ENABLE_TIMING() \
|
||||
do { \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, \
|
||||
&mlir::concretelang::time_util::timestamp) == 0); \
|
||||
char *env = getenv("CONCRETE_TIMING_ENABLED"); \
|
||||
if (env != nullptr) \
|
||||
if (!strncmp(env, "True", 4) || !strncmp(env, "true", 4) || \
|
||||
!strncmp(env, "ON", 2) || !strncmp(env, "on", 2) || \
|
||||
!strncmp(env, "1", 1)) \
|
||||
mlir::concretelang::time_util::timing_enabled = true; \
|
||||
} while (0)
|
||||
|
||||
#define BEGIN_TIME(p) \
|
||||
do { \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, (p)) == 0); \
|
||||
if (mlir::concretelang::time_util::timing_enabled) { \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, (p)) == 0); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#if CONCRETELANG_DATAFLOW_EXECUTION_ENABLED
|
||||
#define END_TIME(p, m) \
|
||||
do { \
|
||||
struct timespec _end_time_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
|
||||
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
|
||||
std::cout << "[NODE \t" << _dfr_debug_get_node_id() << "] \t" << (m) \
|
||||
<< " time : \t" << (p)->tv_sec << "." << (p)->tv_nsec \
|
||||
<< " seconds.\n" \
|
||||
<< std::flush; \
|
||||
if (mlir::concretelang::time_util::timing_enabled) { \
|
||||
struct timespec _end_time_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
|
||||
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
|
||||
struct timespec _timestamp_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
|
||||
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
|
||||
&mlir::concretelang::time_util::timestamp) == 0); \
|
||||
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
|
||||
<< "] -\t"; \
|
||||
std::cout << "[NODE \t" << _dfr_debug_get_node_id() << "] \t" << (m) \
|
||||
<< " time : \t" << time_in_seconds((p)) << " seconds.\n" \
|
||||
<< std::flush; \
|
||||
} \
|
||||
} while (0)
|
||||
#define END_TIME_C(p, m, c) \
|
||||
do { \
|
||||
if (mlir::concretelang::time_util::timing_enabled) { \
|
||||
struct timespec _end_time_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
|
||||
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
|
||||
struct timespec _timestamp_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
|
||||
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
|
||||
&mlir::concretelang::time_util::timestamp) == 0); \
|
||||
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
|
||||
<< "] -\t"; \
|
||||
std::cout << "[NODE \t" << _dfr_debug_get_node_id() << "] \t" << (m) \
|
||||
<< " [" << (c) << "] time : \t" << time_in_seconds((p)) \
|
||||
<< " seconds.\n" \
|
||||
<< std::flush; \
|
||||
} \
|
||||
} while (0)
|
||||
#define END_TIME_C_ACC(p, m, c, acc) \
|
||||
do { \
|
||||
if (mlir::concretelang::time_util::timing_enabled) { \
|
||||
struct timespec _end_time_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
|
||||
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
|
||||
timespec_acc((acc), (p), (acc)); \
|
||||
struct timespec _timestamp_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
|
||||
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
|
||||
&mlir::concretelang::time_util::timestamp) == 0); \
|
||||
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
|
||||
<< "] -\t"; \
|
||||
std::cout << "[NODE \t" << _dfr_debug_get_node_id() << "] \t" << (m) \
|
||||
<< " [" << (c) << "] time : \t" << time_in_seconds((p)) \
|
||||
<< " (total : " << time_in_seconds((acc)) << " )" \
|
||||
<< " seconds.\n" \
|
||||
<< std::flush; \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define END_TIME(p, m) \
|
||||
do { \
|
||||
struct timespec _end_time_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
|
||||
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
|
||||
std::cout << (m) << " time : \t" << (p)->tv_sec << "." << (p)->tv_nsec \
|
||||
<< " seconds.\n" \
|
||||
<< std::flush; \
|
||||
if (mlir::concretelang::time_util::timing_enabled) { \
|
||||
struct timespec _end_time_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
|
||||
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
|
||||
struct timespec _timestamp_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
|
||||
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
|
||||
&mlir::concretelang::time_util::timestamp) == 0); \
|
||||
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
|
||||
<< "] -\t"; \
|
||||
std::cout << (m) << " time : \t" << time_in_seconds((p)) \
|
||||
<< " seconds.\n" \
|
||||
<< std::flush; \
|
||||
} \
|
||||
} while (0)
|
||||
#define END_TIME_C(p, m, c) \
|
||||
do { \
|
||||
if (mlir::concretelang::time_util::timing_enabled) { \
|
||||
struct timespec _end_time_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
|
||||
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
|
||||
struct timespec _timestamp_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
|
||||
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
|
||||
&mlir::concretelang::time_util::timestamp) == 0); \
|
||||
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
|
||||
<< "] -\t"; \
|
||||
std::cout << (m) << " [" << (c) << "] time : \t" << time_in_seconds((p)) \
|
||||
<< " seconds.\n" \
|
||||
<< std::flush; \
|
||||
} \
|
||||
} while (0)
|
||||
#define END_TIME_C_ACC(p, m, c, acc) \
|
||||
do { \
|
||||
if (mlir::concretelang::time_util::timing_enabled) { \
|
||||
struct timespec _end_time_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
|
||||
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
|
||||
timespec_acc((acc), (p), (acc)); \
|
||||
struct timespec _timestamp_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_timestamp_tv) == 0); \
|
||||
assert(timespec_diff(&_timestamp_tv, &_timestamp_tv, \
|
||||
&mlir::concretelang::time_util::timestamp) == 0); \
|
||||
std::cout << "[Timing logs][" << time_in_seconds(&_timestamp_tv) \
|
||||
<< "] -\t"; \
|
||||
std::cout << (m) << " [" << (c) << "] time : \t" << time_in_seconds((p)) \
|
||||
<< " (total : " << time_in_seconds((acc)) << " )" \
|
||||
<< " seconds.\n" \
|
||||
<< std::flush; \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
@@ -57,6 +173,13 @@ static inline double get_thread_cpu_time(void) {
|
||||
return _t;
|
||||
}
|
||||
|
||||
static inline double time_in_seconds(struct timespec *_tv) {
|
||||
double _t;
|
||||
_t = _tv->tv_sec;
|
||||
_t += _tv->tv_nsec * 1e-9;
|
||||
return _t;
|
||||
}
|
||||
|
||||
static inline int timespec_diff(struct timespec *_result,
|
||||
const struct timespec *_px,
|
||||
const struct timespec *_py) {
|
||||
@@ -86,10 +209,23 @@ static inline int timespec_diff(struct timespec *_result,
|
||||
return _x.tv_sec < _y.tv_sec;
|
||||
}
|
||||
|
||||
static inline void timespec_acc(struct timespec *_result,
|
||||
const struct timespec *_px,
|
||||
const struct timespec *_py) {
|
||||
struct timespec _x, _y;
|
||||
_x = *_px;
|
||||
_y = *_py;
|
||||
_result->tv_sec = _x.tv_sec + _y.tv_sec;
|
||||
_result->tv_nsec = _x.tv_nsec + _y.tv_nsec;
|
||||
}
|
||||
|
||||
#else // CONCRETELANG_TIMING_ENABLED
|
||||
|
||||
#define CONCRETELANG_ENABLE_TIMING()
|
||||
#define BEGIN_TIME(p)
|
||||
#define END_TIME(p, m)
|
||||
#define END_TIME_C(p, m, c)
|
||||
#define END_TIME_C_ACC(p, m, c, acc)
|
||||
|
||||
#endif // CONCRETELANG_TIMING_ENABLED
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#ifndef CONCRETELANG_RUNTIME_UTILS_H
|
||||
#define CONCRETELANG_RUNTIME_UTILS_H
|
||||
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
|
||||
// Mainly a wrapper to some LLVM functions. The reason to have this wrapper is
|
||||
// to avoid linking conflicts between the python binary extension, and LLVM.
|
||||
void LLVMInitializeNativeTarget();
|
||||
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
|
||||
#endif
|
||||
@@ -21,13 +21,11 @@ namespace concretelang {
|
||||
namespace dfr {
|
||||
|
||||
struct WorkFunctionRegistry;
|
||||
namespace {
|
||||
static void *dl_handle;
|
||||
static WorkFunctionRegistry *_dfr_node_level_work_function_registry;
|
||||
} // namespace
|
||||
extern WorkFunctionRegistry *_dfr_node_level_work_function_registry;
|
||||
extern void *dl_handle;
|
||||
|
||||
struct WorkFunctionRegistry {
|
||||
WorkFunctionRegistry() { _dfr_node_level_work_function_registry = this; }
|
||||
WorkFunctionRegistry() = default;
|
||||
|
||||
wfnptr getWorkFunctionPointer(const std::string &name) {
|
||||
std::lock_guard<std::mutex> guard(registry_guard);
|
||||
|
||||
@@ -26,8 +26,6 @@ using concretelang::protocol::Message;
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
|
||||
bool getEmitGPUOption();
|
||||
|
||||
/// Compilation context that acts as the root owner of LLVM and MLIR
|
||||
/// data structures directly and indirectly referenced by artefacts
|
||||
/// produced by the `CompilerEngine`.
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <unistd.h>
|
||||
|
||||
using concretelang::clientlib::ClientCircuit;
|
||||
using concretelang::clientlib::ClientProgram;
|
||||
@@ -220,37 +221,32 @@ private:
|
||||
auto new_path = [=]() {
|
||||
llvm::SmallString<0> outputPath;
|
||||
llvm::sys::path::append(outputPath, rootFolder);
|
||||
std::string uid = std::to_string(
|
||||
std::hash<std::thread::id>()(std::this_thread::get_id()));
|
||||
auto pid = getpid();
|
||||
std::string uid = std::to_string(pid);
|
||||
uid.append("-");
|
||||
uid.append(std::to_string(std::rand()));
|
||||
llvm::sys::path::append(outputPath, uid);
|
||||
return std::string(outputPath);
|
||||
};
|
||||
|
||||
// Macos sometimes fail to create new directories. We have to retry a few
|
||||
// times.
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
auto pathString = new_path();
|
||||
auto ec = std::error_code();
|
||||
llvm::errs() << "TestProgram: create temporary directory(" << pathString
|
||||
<< ")\n";
|
||||
if (!std::filesystem::create_directory(pathString, ec)) {
|
||||
llvm::errs() << "TestProgram: fail to create temporary directory("
|
||||
<< pathString << "), ";
|
||||
if (ec) {
|
||||
llvm::errs() << "already exists";
|
||||
} else {
|
||||
llvm::errs() << "error(" << ec.message() << ")";
|
||||
}
|
||||
auto pathString = new_path();
|
||||
auto ec = std::error_code();
|
||||
llvm::errs() << "TestProgram: create temporary directory(" << pathString
|
||||
<< ")\n";
|
||||
if (!std::filesystem::create_directory(pathString, ec)) {
|
||||
llvm::errs() << "TestProgram: fail to create temporary directory("
|
||||
<< pathString << "), ";
|
||||
if (ec) {
|
||||
llvm::errs() << "already exists";
|
||||
} else {
|
||||
llvm::errs() << "TestProgram: directory(" << pathString
|
||||
<< ") successfully created\n";
|
||||
return pathString;
|
||||
llvm::errs() << "error(" << ec.message() << ")";
|
||||
}
|
||||
assert(false);
|
||||
} else {
|
||||
llvm::errs() << "TestProgram: directory(" << pathString
|
||||
<< ") successfully created\n";
|
||||
return pathString;
|
||||
}
|
||||
llvm::errs() << "Failed to create temp directory 5 times. Aborting...\n";
|
||||
assert(false);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "concretelang/Common/Keysets.h"
|
||||
#include "concretelang/Dialect/FHE/IR/FHEOpsDialect.h.inc"
|
||||
#include "concretelang/Runtime/DFRuntime.hpp"
|
||||
#include "concretelang/Runtime/GPUDFG.hpp"
|
||||
#include "concretelang/ServerLib/ServerLib.h"
|
||||
#include "concretelang/Support/logging.h"
|
||||
#include <llvm/Support/Debug.h>
|
||||
@@ -462,6 +463,14 @@ void initDataflowParallelization() {
|
||||
mlir::concretelang::dfr::_dfr_set_required(true);
|
||||
}
|
||||
|
||||
bool checkGPURuntimeEnabled() {
|
||||
return mlir::concretelang::gpu_dfg::check_cuda_runtime_enabled();
|
||||
}
|
||||
|
||||
bool checkCudaDeviceAvailable() {
|
||||
return mlir::concretelang::gpu_dfg::check_cuda_device_available();
|
||||
}
|
||||
|
||||
std::string roundTrip(const char *module) {
|
||||
std::shared_ptr<mlir::concretelang::CompilationContext> ccx =
|
||||
mlir::concretelang::CompilationContext::createShared();
|
||||
@@ -673,6 +682,8 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
|
||||
m.def("terminate_df_parallelization", &terminateDataflowParallelization);
|
||||
|
||||
m.def("init_df_parallelization", &initDataflowParallelization);
|
||||
m.def("check_gpu_runtime_enabled", &checkGPURuntimeEnabled);
|
||||
m.def("check_cuda_device_available", &checkCudaDeviceAvailable);
|
||||
|
||||
pybind11::enum_<mlir::concretelang::Backend>(m, "Backend")
|
||||
.value("CPU", mlir::concretelang::Backend::CPU)
|
||||
@@ -957,6 +968,7 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
|
||||
::concretelang::serverlib::ServerLambda lambda,
|
||||
::concretelang::clientlib::PublicArguments &publicArguments,
|
||||
::concretelang::clientlib::EvaluationKeys &evaluationKeys) {
|
||||
pybind11::gil_scoped_release release;
|
||||
SignalGuard signalGuard;
|
||||
return library_server_call(support, lambda, publicArguments,
|
||||
evaluationKeys);
|
||||
@@ -1238,6 +1250,7 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
|
||||
::concretelang::clientlib::PublicArguments &publicArguments,
|
||||
::concretelang::clientlib::EvaluationKeys &evaluationKeys) {
|
||||
SignalGuard signalGuard;
|
||||
pybind11::gil_scoped_release release;
|
||||
auto keyset = evaluationKeys.keyset;
|
||||
auto values = publicArguments.values;
|
||||
GET_OR_THROW_RESULT(auto output, circuit.call(keyset, values));
|
||||
@@ -1268,6 +1281,7 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
|
||||
[](::concretelang::clientlib::ValueExporter &exporter,
|
||||
size_t position, int64_t value) {
|
||||
SignalGuard signalGuard;
|
||||
pybind11::gil_scoped_release release;
|
||||
|
||||
auto info = exporter.circuit.getCircuitInfo()
|
||||
.asReader()
|
||||
@@ -1288,6 +1302,7 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
|
||||
size_t position, std::vector<int64_t> values,
|
||||
std::vector<int64_t> shape) {
|
||||
SignalGuard signalGuard;
|
||||
pybind11::gil_scoped_release release;
|
||||
std::vector<size_t> dimensions(shape.begin(), shape.end());
|
||||
auto info =
|
||||
exporter.circuit.getCircuitInfo().asReader().getInputs()[position];
|
||||
@@ -1363,6 +1378,7 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
|
||||
size_t position,
|
||||
::concretelang::clientlib::SharedScalarOrTensorData &value) {
|
||||
SignalGuard signalGuard;
|
||||
pybind11::gil_scoped_release release;
|
||||
|
||||
auto result =
|
||||
decrypter.circuit.processOutput(value.value, position);
|
||||
|
||||
@@ -8,6 +8,8 @@ import atexit
|
||||
from mlir._mlir_libs._concretelang._compiler import (
|
||||
terminate_df_parallelization as _terminate_df_parallelization,
|
||||
init_df_parallelization as _init_df_parallelization,
|
||||
check_gpu_runtime_enabled as _check_gpu_runtime_enabled,
|
||||
check_cuda_device_available as _check_cuda_device_available,
|
||||
)
|
||||
from mlir._mlir_libs._concretelang._compiler import round_trip as _round_trip
|
||||
from mlir._mlir_libs._concretelang._compiler import (
|
||||
@@ -49,6 +51,18 @@ def init_dfr():
|
||||
_init_df_parallelization()
|
||||
|
||||
|
||||
def check_gpu_enabled() -> bool:
|
||||
"""Check whether the compiler and runtime support GPU offloading.
|
||||
|
||||
GPU offloading is not always available, in particular in non-GPU wheels."""
|
||||
return _check_gpu_runtime_enabled()
|
||||
|
||||
|
||||
def check_gpu_available() -> bool:
|
||||
"""Check whether a CUDA device is available and online."""
|
||||
return _check_cuda_device_available()
|
||||
|
||||
|
||||
# Cleanly terminate the dataflow runtime if it has been initialized
|
||||
# (does nothing otherwise)
|
||||
atexit.register(_terminate_df_parallelization)
|
||||
|
||||
@@ -403,13 +403,19 @@ struct LowerSDFGPut
|
||||
"SDFG streams only support memrefs and integers.");
|
||||
funcName = stream_emulator_put_uint64;
|
||||
}
|
||||
// Add data ownership flag - if the put operation takes ownership
|
||||
// of the memref data, set to 0 by default.
|
||||
mlir::SmallVector<mlir::Value> operands(putOp->getOperands());
|
||||
operands.push_back(rewriter.create<mlir::arith::ConstantOp>(
|
||||
putOp.getLoc(), rewriter.getI64IntegerAttr(0)));
|
||||
|
||||
if (insertGenericForwardDeclaration(putOp, rewriter, funcName,
|
||||
putOp->getOperandTypes(),
|
||||
mlir::ValueRange{operands}.getTypes(),
|
||||
putOp->getResultTypes())
|
||||
.failed())
|
||||
return ::mlir::failure();
|
||||
mlir::SmallVector<mlir::Value> newOps;
|
||||
castDynamicTensorOps(putOp, rewriter, putOp->getOperands(), newOps);
|
||||
castDynamicTensorOps(putOp, rewriter, operands, newOps);
|
||||
rewriter.replaceOpWithNewOp<mlir::func::CallOp>(
|
||||
putOp, funcName, putOp->getResultTypes(), newOps);
|
||||
return ::mlir::success();
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#include "concretelang/Dialect/Tracing/IR/TracingOps.h"
|
||||
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "mlir/Transforms/DialectConversion.h"
|
||||
@@ -686,6 +687,37 @@ struct ZeroTensorOpPattern
|
||||
};
|
||||
};
|
||||
|
||||
struct TraceCiphertextOpPattern
|
||||
: public mlir::OpConversionPattern<Tracing::TraceCiphertextOp> {
|
||||
TraceCiphertextOpPattern(mlir::MLIRContext *context,
|
||||
mlir::TypeConverter &typeConverter)
|
||||
: mlir::OpConversionPattern<Tracing::TraceCiphertextOp>(
|
||||
typeConverter, context,
|
||||
mlir::concretelang::DEFAULT_PATTERN_BENEFIT) {}
|
||||
|
||||
::mlir::LogicalResult
|
||||
matchAndRewrite(Tracing::TraceCiphertextOp traceCiphertextOp,
|
||||
Tracing::TraceCiphertextOp::Adaptor adaptor,
|
||||
mlir::ConversionPatternRewriter &rewriter) const override {
|
||||
Tracing::TracePlaintextOp ptOp =
|
||||
rewriter.replaceOpWithNewOp<Tracing::TracePlaintextOp>(
|
||||
traceCiphertextOp, mlir::TypeRange{}, adaptor.getCiphertext());
|
||||
|
||||
if (auto msg = traceCiphertextOp.getMsg())
|
||||
ptOp.setMsg(msg);
|
||||
|
||||
if (auto nmsb = traceCiphertextOp.getNmsb())
|
||||
ptOp.setNmsb(nmsb);
|
||||
|
||||
auto inputWidth =
|
||||
ptOp.getPlaintext().getType().cast<mlir::IntegerType>().getWidth();
|
||||
|
||||
ptOp->setAttr("input_width", rewriter.getI64IntegerAttr(inputWidth));
|
||||
|
||||
return ::mlir::success();
|
||||
};
|
||||
};
|
||||
|
||||
struct SimulateTFHEPass : public SimulateTFHEBase<SimulateTFHEPass> {
|
||||
bool enableOverflowDetection;
|
||||
SimulateTFHEPass(bool enableOverflowDetection)
|
||||
@@ -704,7 +736,8 @@ void SimulateTFHEPass::runOnOperation() {
|
||||
target.addLegalOp<mlir::func::CallOp, mlir::memref::GetGlobalOp,
|
||||
mlir::memref::CastOp, mlir::bufferization::AllocTensorOp,
|
||||
mlir::tensor::CastOp, mlir::LLVM::GlobalOp,
|
||||
mlir::LLVM::AddressOfOp, mlir::LLVM::GEPOp>();
|
||||
mlir::LLVM::AddressOfOp, mlir::LLVM::GEPOp,
|
||||
Tracing::TracePlaintextOp>();
|
||||
// Make sure that no ops from `TFHE` remain after the lowering
|
||||
target.addIllegalDialect<TFHE::TFHEDialect>();
|
||||
|
||||
@@ -742,11 +775,11 @@ void SimulateTFHEPass::runOnOperation() {
|
||||
mlir::tensor::InsertOp, mlir::tensor::InsertSliceOp,
|
||||
mlir::tensor::ParallelInsertSliceOp, mlir::tensor::FromElementsOp,
|
||||
mlir::tensor::ExpandShapeOp, mlir::tensor::CollapseShapeOp,
|
||||
mlir::bufferization::AllocTensorOp, mlir::tensor::EmptyOp>(
|
||||
[&](mlir::Operation *op) {
|
||||
return converter.isLegal(op->getResultTypes()) &&
|
||||
converter.isLegal(op->getOperandTypes());
|
||||
});
|
||||
mlir::bufferization::AllocTensorOp, mlir::tensor::EmptyOp,
|
||||
Tracing::TraceCiphertextOp>([&](mlir::Operation *op) {
|
||||
return converter.isLegal(op->getResultTypes()) &&
|
||||
converter.isLegal(op->getOperandTypes());
|
||||
});
|
||||
// Make sure that no ops `linalg.generic` that have illegal types
|
||||
target
|
||||
.addDynamicallyLegalOp<mlir::linalg::GenericOp, mlir::tensor::GenerateOp>(
|
||||
@@ -778,8 +811,8 @@ void SimulateTFHEPass::runOnOperation() {
|
||||
|
||||
patterns.insert<ZeroOpPattern, ZeroTensorOpPattern, KeySwitchGLWEOpPattern,
|
||||
WopPBSGLWEOpPattern, EncodeLutForCrtWopPBSOpPattern,
|
||||
EncodePlaintextWithCrtOpPattern, NegOpPattern>(&getContext(),
|
||||
converter);
|
||||
EncodePlaintextWithCrtOpPattern, NegOpPattern,
|
||||
TraceCiphertextOpPattern>(&getContext(), converter);
|
||||
patterns.insert<SubIntGLWEOpPattern>(&getContext());
|
||||
|
||||
// if overflow detection is enable, then rewrite to CAPI functions that
|
||||
|
||||
@@ -6,11 +6,13 @@
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <initializer_list>
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "boost/outcome.h"
|
||||
|
||||
#include "concretelang/Dialect/FHE/Interfaces/FHEInterfaces.h"
|
||||
#include "mlir/Dialect/Arith/IR/Arith.h"
|
||||
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
||||
#include "mlir/Dialect/Linalg/IR/Linalg.h"
|
||||
@@ -34,6 +36,9 @@
|
||||
#define GEN_PASS_CLASSES
|
||||
#include "concretelang/Dialect/FHE/Analysis/ConcreteOptimizer.h.inc"
|
||||
|
||||
using concrete_optimizer::utils::location_from_string;
|
||||
using concrete_optimizer::utils::location_unknown;
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
namespace optimizer {
|
||||
@@ -110,7 +115,9 @@ struct FunctionToDag {
|
||||
}
|
||||
auto precision = fhe::utils::getEintPrecision(arg);
|
||||
auto shape = getShape(arg);
|
||||
auto opI = dagBuilder.add_input(precision, slice(shape));
|
||||
auto opI =
|
||||
dagBuilder.add_input(precision, slice(shape),
|
||||
*loc_to_location(arg.getParentRegion()->getLoc()));
|
||||
index[arg] = opI;
|
||||
return opI;
|
||||
}
|
||||
@@ -190,6 +197,11 @@ struct FunctionToDag {
|
||||
} else if (auto matmulEintEint = asMatmulEintEint(op)) {
|
||||
addEncMatMulTensor(matmulEintEint, encrypted_inputs, precision);
|
||||
return;
|
||||
} else if (auto zero = asZeroNoise(op)) {
|
||||
// special case as zero are rewritten in several optimizer nodes
|
||||
index = addZeroNoise(zero);
|
||||
} else if (auto additive = asAdditiveNoise(op)) {
|
||||
index = addAdditiveNoise(additive, encrypted_inputs);
|
||||
} else {
|
||||
index = addLevelledOp(op, encrypted_inputs);
|
||||
}
|
||||
@@ -201,6 +213,7 @@ struct FunctionToDag {
|
||||
void addLut(mlir::Operation &op, FHE::FheIntegerInterface inputType,
|
||||
Inputs &encrypted_inputs, int precision) {
|
||||
auto val = op.getResult(0);
|
||||
auto loc = loc_to_location(op.getLoc());
|
||||
assert(encrypted_inputs.size() == 1);
|
||||
// No need to distinguish different lut kind until we do approximate
|
||||
// paradigm on outputs
|
||||
@@ -209,13 +222,14 @@ struct FunctionToDag {
|
||||
std::vector<int32_t> operatorIndexes;
|
||||
if (inputType.isSigned()) {
|
||||
// std::vector<std::int64_t> weights_vector{1};
|
||||
auto addIndex = dagBuilder.add_dot(
|
||||
slice(encrypted_inputs), concrete_optimizer::weights::number(1));
|
||||
auto addIndex =
|
||||
dagBuilder.add_dot(slice(encrypted_inputs),
|
||||
concrete_optimizer::weights::number(1), *loc);
|
||||
encrypted_input = addIndex;
|
||||
operatorIndexes.push_back(addIndex.index);
|
||||
}
|
||||
auto lutIndex =
|
||||
dagBuilder.add_lut(encrypted_input, slice(unknowFunction), precision);
|
||||
auto lutIndex = dagBuilder.add_lut(encrypted_input, slice(unknowFunction),
|
||||
precision, *loc);
|
||||
operatorIndexes.push_back(lutIndex.index);
|
||||
mlir::Builder builder(op.getContext());
|
||||
if (setOptimizerID)
|
||||
@@ -229,7 +243,8 @@ struct FunctionToDag {
|
||||
// No need to distinguish different lut kind until we do approximate
|
||||
// paradigm on outputs
|
||||
auto encrypted_input = encrypted_inputs[0];
|
||||
index[val] = dagBuilder.add_round_op(encrypted_input, rounded_precision);
|
||||
index[val] = dagBuilder.add_round_op(encrypted_input, rounded_precision,
|
||||
*loc_to_location(val.getLoc()));
|
||||
return index[val];
|
||||
}
|
||||
|
||||
@@ -238,7 +253,8 @@ struct FunctionToDag {
|
||||
int new_precision) {
|
||||
assert(encrypted_inputs.size() == 1);
|
||||
auto encrypted_input = encrypted_inputs[0];
|
||||
index[val] = dagBuilder.add_unsafe_cast_op(encrypted_input, new_precision);
|
||||
index[val] = dagBuilder.add_unsafe_cast_op(encrypted_input, new_precision,
|
||||
*loc_to_location(val.getLoc()));
|
||||
return index[val];
|
||||
}
|
||||
|
||||
@@ -247,8 +263,8 @@ struct FunctionToDag {
|
||||
std::vector<std::int64_t> &weights_vector) {
|
||||
assert(encrypted_inputs.size() == 1);
|
||||
auto weights = concrete_optimizer::weights::vector(slice(weights_vector));
|
||||
index[val] =
|
||||
dagBuilder.add_dot(slice(encrypted_inputs), std::move(weights));
|
||||
index[val] = dagBuilder.add_dot(slice(encrypted_inputs), std::move(weights),
|
||||
*loc_to_location(val.getLoc()));
|
||||
return index[val];
|
||||
}
|
||||
|
||||
@@ -256,32 +272,110 @@ struct FunctionToDag {
|
||||
std::string loc;
|
||||
llvm::raw_string_ostream loc_stream(loc);
|
||||
location.print(loc_stream);
|
||||
loc.erase(0, 4); // erase the loc( prefix
|
||||
loc.pop_back(); // erase the ) postfix
|
||||
loc.erase(std::remove(loc.begin(), loc.end(), '"'),
|
||||
loc.end()); // erase the " characters
|
||||
return loc;
|
||||
}
|
||||
|
||||
concrete_optimizer::dag::OperatorIndex
|
||||
addZeroNoise(concretelang::FHE::ZeroNoise &op) {
|
||||
auto val = op->getOpResult(0);
|
||||
auto outShape = getShape(val);
|
||||
auto loc = loc_to_location(op.getLoc());
|
||||
|
||||
// Trivial encrypted constants encoding
|
||||
// There are converted to input + levelledop
|
||||
auto precision = fhe::utils::getEintPrecision(val);
|
||||
auto opI = dagBuilder.add_input(precision, slice(outShape), *loc);
|
||||
auto inputs = Inputs{opI};
|
||||
|
||||
// Default complexity is negligible
|
||||
double const fixedCost = NEGLIGIBLE_COMPLEXITY;
|
||||
double const lweDimCostFactor = NEGLIGIBLE_COMPLEXITY;
|
||||
auto comment = std::string(op->getName().getStringRef()) + " " +
|
||||
loc_to_string(op.getLoc());
|
||||
auto weights = std::vector<double>{1.};
|
||||
index[val] = dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor,
|
||||
fixedCost, slice(weights),
|
||||
slice(outShape), comment, *loc);
|
||||
return index[val];
|
||||
}
|
||||
|
||||
concrete_optimizer::dag::OperatorIndex
|
||||
addAdditiveNoise(concretelang::FHE::AdditiveNoise &op, Inputs &inputs) {
|
||||
auto val = op->getResult(0);
|
||||
auto out_shape = getShape(val);
|
||||
// Default complexity is negligible
|
||||
double fixed_cost = NEGLIGIBLE_COMPLEXITY;
|
||||
double lwe_dim_cost_factor = NEGLIGIBLE_COMPLEXITY;
|
||||
auto comment = std::string(op->getName().getStringRef()) + " " +
|
||||
loc_to_string(op.getLoc());
|
||||
auto loc = loc_to_location(op.getLoc());
|
||||
auto weights = std::vector<double>(inputs.size(), 1.);
|
||||
index[val] = dagBuilder.add_levelled_op(slice(inputs), lwe_dim_cost_factor,
|
||||
fixed_cost, slice(weights),
|
||||
slice(out_shape), comment, *loc);
|
||||
return index[val];
|
||||
}
|
||||
|
||||
rust::Box<concrete_optimizer::Location>
|
||||
loc_to_location(mlir::Location location) {
|
||||
return location_from_string(loc_to_string(location));
|
||||
}
|
||||
concrete_optimizer::dag::OperatorIndex addLevelledOp(mlir::Operation &op,
|
||||
Inputs &inputs) {
|
||||
auto val = op.getResult(0);
|
||||
auto out_shape = getShape(val);
|
||||
auto loc = loc_to_location(op.getLoc());
|
||||
if (inputs.empty()) {
|
||||
// Trivial encrypted constants encoding
|
||||
// There are converted to input + levelledop
|
||||
auto precision = fhe::utils::getEintPrecision(val);
|
||||
auto opI = dagBuilder.add_input(precision, slice(out_shape));
|
||||
auto opI = dagBuilder.add_input(precision, slice(out_shape), *loc);
|
||||
inputs.push_back(opI);
|
||||
}
|
||||
// Default complexity is negligible
|
||||
double fixed_cost = NEGLIGIBLE_COMPLEXITY;
|
||||
double lwe_dim_cost_factor = NEGLIGIBLE_COMPLEXITY;
|
||||
auto smanp_int = op.getAttrOfType<mlir::IntegerAttr>("SMANP");
|
||||
auto loc = loc_to_string(op.getLoc());
|
||||
assert(smanp_int && "Missing manp value on a crypto operation");
|
||||
// TODO: use APIFloat.sqrt when it's available
|
||||
double manp = sqrt(smanp_int.getValue().roundToDouble());
|
||||
auto comment = std::string(op.getName().getStringRef()) + " " + loc;
|
||||
index[val] =
|
||||
dagBuilder.add_levelled_op(slice(inputs), lwe_dim_cost_factor,
|
||||
fixed_cost, manp, slice(out_shape), comment);
|
||||
auto comment = std::string(op.getName().getStringRef()) + " " +
|
||||
loc_to_string(op.getLoc());
|
||||
|
||||
size_t n_inputs = 0;
|
||||
double sq_sum = 0;
|
||||
for (auto input : op.getOperands()) {
|
||||
if (!fhe::utils::isEncryptedValue(input)) {
|
||||
continue;
|
||||
}
|
||||
n_inputs += 1;
|
||||
if (input.isa<mlir::BlockArgument>()) {
|
||||
sq_sum += 1.0;
|
||||
} else {
|
||||
auto inpSmanpInt =
|
||||
input.getDefiningOp()->getAttrOfType<mlir::IntegerAttr>("SMANP");
|
||||
const double inpSManp = inpSmanpInt.getValue().roundToDouble();
|
||||
sq_sum += inpSManp;
|
||||
}
|
||||
}
|
||||
assert(inputs.size() == n_inputs);
|
||||
double weight;
|
||||
if (sq_sum == 0) {
|
||||
// The max input manp is zero, meaning the inputs are all zero tensors
|
||||
// with no noise. In this case it does not matter the weight since it will
|
||||
// multiply zero.
|
||||
weight = 1.;
|
||||
} else {
|
||||
double smanp_dbl = smanp_int.getValue().roundToDouble();
|
||||
weight = std::max(sqrt(smanp_dbl / sq_sum), 1.0);
|
||||
assert(!std::isnan(weight));
|
||||
}
|
||||
auto weights = std::vector<double>(n_inputs, weight);
|
||||
index[val] = dagBuilder.add_levelled_op(slice(inputs), lwe_dim_cost_factor,
|
||||
fixed_cost, slice(weights),
|
||||
slice(out_shape), comment, *loc);
|
||||
return index[val];
|
||||
}
|
||||
|
||||
@@ -297,17 +391,19 @@ struct FunctionToDag {
|
||||
assert(encrypted_inputs.size() == 1);
|
||||
auto input = lsbOp.getInput();
|
||||
auto result = lsbOp.getResult();
|
||||
auto loc = loc_to_location(lsbOp.getLoc());
|
||||
auto input_precision = fhe::utils::getEintPrecision(input);
|
||||
auto output_precision = fhe::utils::getEintPrecision(result);
|
||||
auto lsb_shiffted_as_1bit_wop = dagBuilder.add_dot(
|
||||
slice(encrypted_inputs),
|
||||
concrete_optimizer::weights::number(1 << input_precision));
|
||||
concrete_optimizer::weights::number(1 << input_precision), *loc);
|
||||
std::vector<std::uint64_t> unknownFunction;
|
||||
auto overflow_bit_precision = 0;
|
||||
auto lsb_as_0_bits = dagBuilder.add_unsafe_cast_op(
|
||||
lsb_shiffted_as_1bit_wop, overflow_bit_precision); // id for rotation
|
||||
auto lsb_as_0_bits = dagBuilder.add_unsafe_cast_op(lsb_shiffted_as_1bit_wop,
|
||||
overflow_bit_precision,
|
||||
*loc); // id for rotation
|
||||
auto lsb_result = dagBuilder.add_lut(lsb_as_0_bits, slice(unknownFunction),
|
||||
output_precision);
|
||||
output_precision, *loc);
|
||||
auto lsb_result_corrected = idPlaceholder(lsb_result);
|
||||
index[result] = lsb_result_corrected;
|
||||
|
||||
@@ -336,46 +432,23 @@ struct FunctionToDag {
|
||||
mlir::Value result = mulOp.getResult();
|
||||
const std::vector<uint64_t> resultShape = getShape(result);
|
||||
|
||||
Operation *xOp = mulOp.getLhs().getDefiningOp();
|
||||
Operation *yOp = mulOp.getRhs().getDefiningOp();
|
||||
|
||||
const double fixedCost = NEGLIGIBLE_COMPLEXITY;
|
||||
const double lweDimCostFactor = NEGLIGIBLE_COMPLEXITY;
|
||||
|
||||
llvm::APInt xSmanp = llvm::APInt{1, 1, false};
|
||||
if (xOp != nullptr) {
|
||||
const auto xSmanpAttr = xOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
|
||||
assert(xSmanpAttr && "Missing SMANP value on a crypto operation");
|
||||
xSmanp = xSmanpAttr.getValue();
|
||||
}
|
||||
|
||||
llvm::APInt ySmanp = llvm::APInt{1, 1, false};
|
||||
if (yOp != nullptr) {
|
||||
const auto ySmanpAttr = yOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
|
||||
assert(ySmanpAttr && "Missing SMANP value on a crypto operation");
|
||||
ySmanp = ySmanpAttr.getValue();
|
||||
}
|
||||
|
||||
auto loc = loc_to_string(mulOp.getLoc());
|
||||
auto comment = std::string(mulOp->getName().getStringRef()) + " " + loc;
|
||||
|
||||
// (x + y) and (x - y)
|
||||
const double addSubManp =
|
||||
sqrt(xSmanp.roundToDouble() + ySmanp.roundToDouble());
|
||||
|
||||
// tlu(v)
|
||||
const double tluManp = 1;
|
||||
|
||||
// tlu(v1) - tlu(v2)
|
||||
const double tluSubManp = sqrt(tluManp + tluManp);
|
||||
auto comment = std::string(mulOp->getName().getStringRef()) + " " +
|
||||
loc_to_string(mulOp.getLoc());
|
||||
auto loc = loc_to_location(mulOp.getLoc());
|
||||
|
||||
// for tlus
|
||||
const std::vector<std::uint64_t> unknownFunction;
|
||||
|
||||
// tlu(x + y)
|
||||
auto addNode =
|
||||
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
|
||||
addSubManp, slice(resultShape), comment);
|
||||
|
||||
auto addWeights = std::vector<double>{1, 1};
|
||||
auto addNode = dagBuilder.add_levelled_op(
|
||||
slice(inputs), lweDimCostFactor, fixedCost, slice(addWeights),
|
||||
slice(resultShape), comment, *loc);
|
||||
|
||||
std::optional<concrete_optimizer::dag::OperatorIndex> lhsCorrectionNode;
|
||||
if (isSignedEint(mulOp.getType())) {
|
||||
// If signed mul we need to add the addition node for correction of the
|
||||
@@ -383,31 +456,37 @@ struct FunctionToDag {
|
||||
addNode = dagBuilder.add_dot(
|
||||
slice(std::vector<concrete_optimizer::dag::OperatorIndex>{addNode}),
|
||||
concrete_optimizer::weights::vector(
|
||||
slice(std::vector<std::int64_t>{1})));
|
||||
slice(std::vector<std::int64_t>{1})),
|
||||
*loc);
|
||||
lhsCorrectionNode = addNode;
|
||||
}
|
||||
auto lhsTluNode =
|
||||
dagBuilder.add_lut(addNode, slice(unknownFunction), precision);
|
||||
dagBuilder.add_lut(addNode, slice(unknownFunction), precision, *loc);
|
||||
|
||||
// tlu(x - y)
|
||||
auto subNode =
|
||||
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
|
||||
addSubManp, slice(resultShape), comment);
|
||||
auto subWeights = std::vector<double>{1, 1};
|
||||
auto subNode = dagBuilder.add_levelled_op(
|
||||
slice(inputs), lweDimCostFactor, fixedCost, slice(subWeights),
|
||||
slice(resultShape), comment, *loc);
|
||||
|
||||
// This is a signed tlu so we need to also add the addition for correction
|
||||
// signed tlu
|
||||
auto rhsCorrectionNode = dagBuilder.add_dot(
|
||||
slice(std::vector<concrete_optimizer::dag::OperatorIndex>{subNode}),
|
||||
concrete_optimizer::weights::vector(
|
||||
slice(std::vector<std::int64_t>{1})));
|
||||
auto rhsTluNode = dagBuilder.add_lut(rhsCorrectionNode,
|
||||
slice(unknownFunction), precision);
|
||||
slice(std::vector<std::int64_t>{1})),
|
||||
*loc);
|
||||
auto rhsTluNode = dagBuilder.add_lut(
|
||||
rhsCorrectionNode, slice(unknownFunction), precision, *loc);
|
||||
|
||||
// tlu(x + y) - tlu(x - y)
|
||||
auto resultWeights = std::vector<double>{1, 1};
|
||||
const std::vector<concrete_optimizer::dag::OperatorIndex> subInputs = {
|
||||
lhsTluNode, rhsTluNode};
|
||||
auto resultNode = dagBuilder.add_levelled_op(
|
||||
slice(subInputs), lweDimCostFactor, fixedCost, tluSubManp,
|
||||
slice(resultShape), comment);
|
||||
slice(subInputs), lweDimCostFactor, fixedCost, slice(resultWeights),
|
||||
slice(resultShape), comment, *loc);
|
||||
|
||||
index[result] = resultNode;
|
||||
|
||||
mlir::Builder builder(mulOp.getContext());
|
||||
@@ -430,6 +509,7 @@ struct FunctionToDag {
|
||||
addTensorInnerProductEncEnc(InnerProductOp &innerProductOp, Inputs &inputs,
|
||||
int precision) {
|
||||
mlir::Value result = innerProductOp.getResult();
|
||||
auto loc = loc_to_location(innerProductOp.getLoc());
|
||||
const std::vector<uint64_t> resultShape = getShape(result);
|
||||
|
||||
// We assume a first tensorized matmul step
|
||||
@@ -512,34 +592,11 @@ struct FunctionToDag {
|
||||
|
||||
// 1. (x + y) and (x - y) -> supposing broadcasting is used
|
||||
// to tensorize this operation
|
||||
|
||||
Operation *xOp = innerProductOp.getLhs().getDefiningOp();
|
||||
Operation *yOp = innerProductOp.getRhs().getDefiningOp();
|
||||
|
||||
const double fixedCost = NEGLIGIBLE_COMPLEXITY;
|
||||
const double lweDimCostFactor = NEGLIGIBLE_COMPLEXITY;
|
||||
|
||||
llvm::APInt xSmanp = llvm::APInt{1, 1, false};
|
||||
if (xOp != nullptr) {
|
||||
const auto xSmanpAttr = xOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
|
||||
assert(xSmanpAttr && "Missing SMANP value on a crypto operation");
|
||||
xSmanp = xSmanpAttr.getValue();
|
||||
}
|
||||
|
||||
llvm::APInt ySmanp = llvm::APInt{1, 1, false};
|
||||
if (yOp != nullptr) {
|
||||
const auto ySmanpAttr = yOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
|
||||
assert(ySmanpAttr && "Missing SMANP value on a crypto operation");
|
||||
ySmanp = ySmanpAttr.getValue();
|
||||
}
|
||||
|
||||
auto loc = loc_to_string(innerProductOp.getLoc());
|
||||
auto comment =
|
||||
std::string(innerProductOp->getName().getStringRef()) + " " + loc;
|
||||
|
||||
// (x + y) and (x - y)
|
||||
const double addSubManp =
|
||||
sqrt(xSmanp.roundToDouble() + ySmanp.roundToDouble());
|
||||
auto comment = std::string(innerProductOp->getName().getStringRef()) + " " +
|
||||
loc_to_string(innerProductOp.getLoc());
|
||||
|
||||
// tlu(v)
|
||||
const double tluManp = 1;
|
||||
@@ -551,9 +608,11 @@ struct FunctionToDag {
|
||||
const std::vector<std::uint64_t> unknownFunction;
|
||||
|
||||
// tlu(x + y)
|
||||
auto addNode =
|
||||
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
|
||||
addSubManp, slice(pairMatrixShape), comment);
|
||||
auto addWeights = std::vector<double>{1, 1};
|
||||
auto addNode = dagBuilder.add_levelled_op(
|
||||
slice(inputs), lweDimCostFactor, fixedCost, slice(addWeights),
|
||||
slice(pairMatrixShape), comment, *loc);
|
||||
|
||||
std::optional<concrete_optimizer::dag::OperatorIndex> lhsCorrectionNode;
|
||||
if (isSignedEint(innerProductOp.getType())) {
|
||||
// If signed mul we need to add the addition node for correction of the
|
||||
@@ -561,31 +620,36 @@ struct FunctionToDag {
|
||||
addNode = dagBuilder.add_dot(
|
||||
slice(std::vector<concrete_optimizer::dag::OperatorIndex>{addNode}),
|
||||
concrete_optimizer::weights::vector(
|
||||
slice(std::vector<std::int64_t>{1})));
|
||||
slice(std::vector<std::int64_t>{1})),
|
||||
*loc);
|
||||
lhsCorrectionNode = addNode;
|
||||
}
|
||||
auto lhsTluNode =
|
||||
dagBuilder.add_lut(addNode, slice(unknownFunction), precision);
|
||||
dagBuilder.add_lut(addNode, slice(unknownFunction), precision, *loc);
|
||||
|
||||
// tlu(x - y)
|
||||
auto subNode =
|
||||
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
|
||||
addSubManp, slice(pairMatrixShape), comment);
|
||||
auto subWeights = std::vector<double>{1, 1};
|
||||
auto subNode = dagBuilder.add_levelled_op(
|
||||
slice(inputs), lweDimCostFactor, fixedCost, slice(subWeights),
|
||||
slice(pairMatrixShape), comment, *loc);
|
||||
|
||||
// This is a signed tlu so we need to also add the addition for correction
|
||||
// signed tlu
|
||||
auto rhsCorrectionNode = dagBuilder.add_dot(
|
||||
slice(std::vector<concrete_optimizer::dag::OperatorIndex>{subNode}),
|
||||
concrete_optimizer::weights::vector(
|
||||
slice(std::vector<std::int64_t>{1})));
|
||||
auto rhsTluNode = dagBuilder.add_lut(rhsCorrectionNode,
|
||||
slice(unknownFunction), precision);
|
||||
slice(std::vector<std::int64_t>{1})),
|
||||
*loc);
|
||||
auto rhsTluNode = dagBuilder.add_lut(
|
||||
rhsCorrectionNode, slice(unknownFunction), precision, *loc);
|
||||
|
||||
// tlu(x + y) - tlu(x - y)
|
||||
auto resultWeights = std::vector<double>{1, 1};
|
||||
const std::vector<concrete_optimizer::dag::OperatorIndex> subInputs = {
|
||||
lhsTluNode, rhsTluNode};
|
||||
auto resultNode = dagBuilder.add_levelled_op(
|
||||
slice(subInputs), lweDimCostFactor, fixedCost, tluSubManp,
|
||||
slice(pairMatrixShape), comment);
|
||||
slice(subInputs), lweDimCostFactor, fixedCost, slice(resultWeights),
|
||||
slice(pairMatrixShape), comment, *loc);
|
||||
|
||||
// 3. Sum(tlu(x + y) - tlu(x - y))
|
||||
// Create a leveled op that simulates concatenation. It takes
|
||||
@@ -606,9 +670,10 @@ struct FunctionToDag {
|
||||
|
||||
// TODO: use APIFloat.sqrt when it's available
|
||||
double manp = sqrt(smanp_int.getValue().roundToDouble());
|
||||
auto weights = std::vector<double>(sumOperands.size(), manp / tluSubManp);
|
||||
index[result] = dagBuilder.add_levelled_op(
|
||||
slice(sumOperands), lwe_dim_cost_factor, fixed_cost, manp,
|
||||
slice(resultShape), comment);
|
||||
slice(sumOperands), lwe_dim_cost_factor, fixed_cost, slice(weights),
|
||||
slice(resultShape), comment, *loc);
|
||||
|
||||
// Create the TFHE.OId attributes
|
||||
// The first elements of the vector are nodes for the encrypted
|
||||
@@ -647,49 +712,31 @@ struct FunctionToDag {
|
||||
|
||||
void addMax(FHE::MaxEintOp &maxOp, Inputs &inputs, int precision) {
|
||||
mlir::Value result = maxOp.getResult();
|
||||
auto loc = loc_to_location(maxOp.getLoc());
|
||||
const std::vector<uint64_t> resultShape = getShape(result);
|
||||
|
||||
Operation *xOp = maxOp.getX().getDefiningOp();
|
||||
Operation *yOp = maxOp.getY().getDefiningOp();
|
||||
|
||||
const double fixedCost = NEGLIGIBLE_COMPLEXITY;
|
||||
const double lweDimCostFactor = NEGLIGIBLE_COMPLEXITY;
|
||||
|
||||
llvm::APInt xSmanp = llvm::APInt{1, 1, false};
|
||||
if (xOp != nullptr) {
|
||||
const auto xSmanpAttr = xOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
|
||||
assert(xSmanpAttr && "Missing SMANP value on a crypto operation");
|
||||
xSmanp = xSmanpAttr.getValue();
|
||||
}
|
||||
auto comment = std::string(maxOp->getName().getStringRef()) + " " +
|
||||
loc_to_string(maxOp.getLoc());
|
||||
|
||||
llvm::APInt ySmanp = llvm::APInt{1, 1, false};
|
||||
if (yOp != nullptr) {
|
||||
const auto ySmanpAttr = yOp->getAttrOfType<mlir::IntegerAttr>("SMANP");
|
||||
assert(ySmanpAttr && "Missing SMANP value on a crypto operation");
|
||||
ySmanp = ySmanpAttr.getValue();
|
||||
}
|
||||
auto subWeights = std::vector<double>{1, 1};
|
||||
auto subNode = dagBuilder.add_levelled_op(
|
||||
slice(inputs), lweDimCostFactor, fixedCost, slice(subWeights),
|
||||
slice(resultShape), comment, *loc);
|
||||
|
||||
const double subManp =
|
||||
sqrt(xSmanp.roundToDouble() + ySmanp.roundToDouble());
|
||||
|
||||
auto loc = loc_to_string(maxOp.getLoc());
|
||||
auto comment = std::string(maxOp->getName().getStringRef()) + " " + loc;
|
||||
|
||||
auto subNode =
|
||||
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
|
||||
subManp, slice(resultShape), comment);
|
||||
|
||||
const double tluNodeManp = 1;
|
||||
const std::vector<std::uint64_t> unknownFunction;
|
||||
auto tluNode =
|
||||
dagBuilder.add_lut(subNode, slice(unknownFunction), precision);
|
||||
dagBuilder.add_lut(subNode, slice(unknownFunction), precision, *loc);
|
||||
|
||||
const double addManp = sqrt(tluNodeManp + ySmanp.roundToDouble());
|
||||
const std::vector<concrete_optimizer::dag::OperatorIndex> addInputs = {
|
||||
tluNode, inputs[1]};
|
||||
auto addWeights = std::vector<double>{1, 1};
|
||||
auto resultNode = dagBuilder.add_levelled_op(
|
||||
slice(addInputs), lweDimCostFactor, fixedCost, addManp,
|
||||
slice(resultShape), comment);
|
||||
slice(addInputs), lweDimCostFactor, fixedCost, slice(addWeights),
|
||||
slice(resultShape), comment, *loc);
|
||||
|
||||
index[result] = resultNode;
|
||||
|
||||
// Set attribute on the MLIR node
|
||||
@@ -705,6 +752,7 @@ struct FunctionToDag {
|
||||
int precision) {
|
||||
mlir::Value result = maxpool2dOp.getResult();
|
||||
const std::vector<uint64_t> resultShape = getShape(result);
|
||||
auto loc = loc_to_location(maxpool2dOp.getLoc());
|
||||
|
||||
// all TLUs are flattened into a dimension
|
||||
// to create a single TLU node in optimizer dag
|
||||
@@ -732,25 +780,29 @@ struct FunctionToDag {
|
||||
|
||||
const double subManp = sqrt(2 * inputSmanp.roundToDouble() + 1);
|
||||
|
||||
auto loc = loc_to_string(maxpool2dOp.getLoc());
|
||||
auto comment =
|
||||
std::string(maxpool2dOp->getName().getStringRef()) + " " + loc;
|
||||
auto comment = std::string(maxpool2dOp->getName().getStringRef()) + " " +
|
||||
loc_to_string(maxpool2dOp.getLoc());
|
||||
|
||||
auto subNode =
|
||||
dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor, fixedCost,
|
||||
subManp, slice(fakeShape), comment);
|
||||
auto subWeights = std::vector<double>(
|
||||
inputs.size(), subManp / sqrt(inputSmanp.roundToDouble()));
|
||||
auto subNode = dagBuilder.add_levelled_op(slice(inputs), lweDimCostFactor,
|
||||
fixedCost, slice(subWeights),
|
||||
slice(fakeShape), comment, *loc);
|
||||
|
||||
const std::vector<std::uint64_t> unknownFunction;
|
||||
auto tluNode =
|
||||
dagBuilder.add_lut(subNode, slice(unknownFunction), precision);
|
||||
dagBuilder.add_lut(subNode, slice(unknownFunction), precision, *loc);
|
||||
|
||||
const double addManp = sqrt(inputSmanp.roundToDouble() + 1);
|
||||
const std::vector<concrete_optimizer::dag::OperatorIndex> addInputs = {
|
||||
tluNode, inputs[0]};
|
||||
|
||||
auto resultWeights = std::vector<double>(
|
||||
addInputs.size(), addManp / sqrt(inputSmanp.roundToDouble()));
|
||||
auto resultNode = dagBuilder.add_levelled_op(
|
||||
slice(addInputs), lweDimCostFactor, fixedCost, addManp,
|
||||
slice(resultShape), comment);
|
||||
slice(addInputs), lweDimCostFactor, fixedCost, slice(resultWeights),
|
||||
slice(resultShape), comment, *loc);
|
||||
|
||||
index[result] = resultNode;
|
||||
// Set attribute on the MLIR node
|
||||
mlir::Builder builder(maxpool2dOp.getContext());
|
||||
@@ -771,7 +823,8 @@ struct FunctionToDag {
|
||||
idPlaceholder(concrete_optimizer::dag::OperatorIndex input) {
|
||||
std::vector inputs = {input};
|
||||
return dagBuilder.add_dot(slice(inputs),
|
||||
concrete_optimizer::weights::number(1));
|
||||
concrete_optimizer::weights::number(1),
|
||||
*location_unknown());
|
||||
}
|
||||
|
||||
Inputs encryptedInputs(mlir::Operation &op) {
|
||||
@@ -852,6 +905,14 @@ struct FunctionToDag {
|
||||
return llvm::dyn_cast<mlir::concretelang::FHELinalg::MulEintOp>(op);
|
||||
}
|
||||
|
||||
mlir::concretelang::FHE::ZeroNoise asZeroNoise(mlir::Operation &op) {
|
||||
return llvm::dyn_cast<mlir::concretelang::FHE::ZeroNoise>(op);
|
||||
}
|
||||
|
||||
mlir::concretelang::FHE::AdditiveNoise asAdditiveNoise(mlir::Operation &op) {
|
||||
return llvm::dyn_cast<mlir::concretelang::FHE::AdditiveNoise>(op);
|
||||
}
|
||||
|
||||
mlir::concretelang::FHE::MaxEintOp asMax(mlir::Operation &op) {
|
||||
return llvm::dyn_cast<mlir::concretelang::FHE::MaxEintOp>(op);
|
||||
}
|
||||
|
||||
@@ -794,14 +794,12 @@ public:
|
||||
std::optional<llvm::APInt>
|
||||
norm2SqEquivFromOp(Operation *op, ArrayRef<const MANPLattice *> operands) {
|
||||
std::optional<llvm::APInt> norm2SqEquiv;
|
||||
if (auto cstNoiseOp =
|
||||
llvm::dyn_cast<mlir::concretelang::FHE::ConstantNoise>(op)) {
|
||||
if (llvm::isa<mlir::concretelang::FHE::ZeroEintOp,
|
||||
mlir::concretelang::FHE::ZeroTensorOp>(op)) {
|
||||
norm2SqEquiv = llvm::APInt{1, 0, false};
|
||||
} else {
|
||||
norm2SqEquiv = llvm::APInt{1, 1, false};
|
||||
}
|
||||
if (auto zeroNoiseOp =
|
||||
llvm::dyn_cast<mlir::concretelang::FHE::ZeroNoise>(op)) {
|
||||
norm2SqEquiv = llvm::APInt{1, 0, false};
|
||||
} else if (auto cstNoiseOp =
|
||||
llvm::dyn_cast<mlir::concretelang::FHE::ConstantNoise>(op)) {
|
||||
norm2SqEquiv = llvm::APInt{1, 1, false};
|
||||
} else if (llvm::isa<mlir::concretelang::FHE::ToBoolOp>(op) ||
|
||||
llvm::isa<mlir::concretelang::FHE::FromBoolOp>(op)) {
|
||||
norm2SqEquiv = getNoOpSqMANP(operands);
|
||||
|
||||
@@ -306,19 +306,6 @@ static void lowerDataflowTaskOp(RT::DataflowTaskOp DFTOp,
|
||||
DFTOp.erase();
|
||||
}
|
||||
|
||||
static void registerWorkFunction(mlir::func::FuncOp parentFunc,
|
||||
mlir::func::FuncOp workFunction) {
|
||||
OpBuilder builder(parentFunc.getBody());
|
||||
builder.setInsertionPointToStart(&parentFunc.getBody().front());
|
||||
|
||||
auto fnptr = builder.create<mlir::func::ConstantOp>(
|
||||
parentFunc.getLoc(), workFunction.getFunctionType(),
|
||||
SymbolRefAttr::get(builder.getContext(), workFunction.getName()));
|
||||
|
||||
builder.create<RT::RegisterTaskWorkFunctionOp>(parentFunc.getLoc(),
|
||||
fnptr.getResult());
|
||||
}
|
||||
|
||||
static func::FuncOp getCalledFunction(CallOpInterface callOp) {
|
||||
SymbolRefAttr sym = callOp.getCallableForCallee().dyn_cast<SymbolRefAttr>();
|
||||
if (!sym)
|
||||
@@ -333,8 +320,6 @@ struct LowerDataflowTasksPass
|
||||
|
||||
void runOnOperation() override {
|
||||
auto module = getOperation();
|
||||
SmallVector<func::FuncOp, 4> workFunctions;
|
||||
SmallVector<func::FuncOp, 1> entryPoints;
|
||||
|
||||
module.walk([&](mlir::func::FuncOp func) {
|
||||
static int wfn_id = 0;
|
||||
@@ -357,58 +342,12 @@ struct LowerDataflowTasksPass
|
||||
outliningMap.push_back(
|
||||
std::pair<RT::DataflowTaskOp, func::FuncOp>(op, outlinedFunc));
|
||||
symbolTable.insert(outlinedFunc);
|
||||
workFunctions.push_back(outlinedFunc);
|
||||
return WalkResult::advance();
|
||||
});
|
||||
// Lower the DF task ops to RT dialect ops.
|
||||
for (auto mapping : outliningMap)
|
||||
lowerDataflowTaskOp(mapping.first, mapping.second);
|
||||
|
||||
// Gather all entry points (assuming no recursive calls to entry points)
|
||||
// Main is always an entry-point - otherwise check if this
|
||||
// function is called within the module. TODO: we assume no
|
||||
// recursion.
|
||||
if (func.getName() == "main")
|
||||
entryPoints.push_back(func);
|
||||
else {
|
||||
bool found = false;
|
||||
module.walk([&](mlir::func::CallOp op) {
|
||||
if (getCalledFunction(op) == func)
|
||||
found = true;
|
||||
});
|
||||
if (!found)
|
||||
entryPoints.push_back(func);
|
||||
}
|
||||
});
|
||||
|
||||
for (auto entryPoint : entryPoints) {
|
||||
// If this is a JIT invocation and we're not on the root node,
|
||||
// we do not need to do any computation, only register all work
|
||||
// functions with the runtime system
|
||||
if (!workFunctions.empty()) {
|
||||
if (!dfr::_dfr_is_root_node()) {
|
||||
entryPoint.eraseBody();
|
||||
Block *b = new Block;
|
||||
FunctionType funTy = entryPoint.getFunctionType();
|
||||
SmallVector<Location> locations(funTy.getInputs().size(),
|
||||
entryPoint.getLoc());
|
||||
b->addArguments(funTy.getInputs(), locations);
|
||||
entryPoint.getBody().push_front(b);
|
||||
for (int i = funTy.getNumInputs() - 1; i >= 0; --i)
|
||||
entryPoint.eraseArgument(i);
|
||||
for (int i = funTy.getNumResults() - 1; i >= 0; --i)
|
||||
entryPoint.eraseResult(i);
|
||||
OpBuilder builder(entryPoint.getBody());
|
||||
builder.setInsertionPointToEnd(&entryPoint.getBody().front());
|
||||
builder.create<mlir::func::ReturnOp>(entryPoint.getLoc());
|
||||
}
|
||||
}
|
||||
|
||||
// Generate code to register all work-functions with the
|
||||
// runtime.
|
||||
for (auto wf : workFunctions)
|
||||
registerWorkFunction(entryPoint, wf);
|
||||
}
|
||||
}
|
||||
LowerDataflowTasksPass(bool debug) : debug(debug){};
|
||||
|
||||
@@ -428,29 +367,27 @@ struct StartStopPass : public StartStopBase<StartStopPass> {
|
||||
|
||||
void runOnOperation() override {
|
||||
auto module = getOperation();
|
||||
int useDFR = 0;
|
||||
SmallVector<func::FuncOp, 1> entryPoints;
|
||||
|
||||
// Gather all entry points in the module.
|
||||
module.walk([&](mlir::func::FuncOp func) {
|
||||
// Do not add start/stop to work functions - but if any are
|
||||
// present, then we need to activate the runtime
|
||||
if (func->getAttr("_dfr_work_function_attribute")) {
|
||||
useDFR = 1;
|
||||
} else {
|
||||
// Main is always an entry-point - otherwise check if this
|
||||
// function is called within the module. TODO: we assume no
|
||||
// recursion.
|
||||
if (func.getName() == "main")
|
||||
// Work functions are never allowed to be an entry point.
|
||||
if (func->getAttr("_dfr_work_function_attribute"))
|
||||
return;
|
||||
|
||||
// Main is always an entry-point - otherwise check if this
|
||||
// function is called within the module. TODO: we assume no
|
||||
// recursion.
|
||||
if (func.getName() == "main")
|
||||
entryPoints.push_back(func);
|
||||
else {
|
||||
bool found = false;
|
||||
module.walk([&](mlir::func::CallOp op) {
|
||||
if (getCalledFunction(op) == func)
|
||||
found = true;
|
||||
});
|
||||
if (!found)
|
||||
entryPoints.push_back(func);
|
||||
else {
|
||||
bool found = false;
|
||||
module.walk([&](mlir::func::CallOp op) {
|
||||
if (getCalledFunction(op) == func)
|
||||
found = true;
|
||||
});
|
||||
if (!found)
|
||||
entryPoints.push_back(func);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -459,7 +396,7 @@ struct StartStopPass : public StartStopBase<StartStopPass> {
|
||||
OpBuilder builder(entryPoint.getBody());
|
||||
builder.setInsertionPointToStart(&entryPoint.getBody().front());
|
||||
Value useDFRVal = builder.create<arith::ConstantOp>(
|
||||
entryPoint.getLoc(), builder.getI64IntegerAttr(useDFR));
|
||||
entryPoint.getLoc(), builder.getI64IntegerAttr(1));
|
||||
|
||||
// Check if this entry point uses a context
|
||||
Value ctx = nullptr;
|
||||
|
||||
@@ -2,12 +2,14 @@ add_mlir_dialect_library(
|
||||
ConcretelangSDFGTransforms
|
||||
BufferizableOpInterfaceImpl.cpp
|
||||
SDFGConvertibleOpInterfaceImpl.cpp
|
||||
SDFGBufferOwnership.cpp
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${PROJECT_SOURCE_DIR}/include/concretelang/Dialect/Concrete
|
||||
${PROJECT_SOURCE_DIR}/include/concretelang/Dialect/SDFG
|
||||
DEPENDS
|
||||
mlir-headers
|
||||
SDFGDialect
|
||||
ConcretelangSDFGTransformsPassIncGen
|
||||
ConcretelangSDFGInterfaces
|
||||
LINK_LIBS
|
||||
PUBLIC
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#include "mlir/Dialect/Arith/IR/Arith.h"
|
||||
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
|
||||
#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
|
||||
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
||||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/SCF/IR/SCF.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
#include "mlir/IR/Dialect.h"
|
||||
#include "mlir/IR/Dominance.h"
|
||||
#include "mlir/IR/Operation.h"
|
||||
|
||||
#include "concretelang/Conversion/Tools.h"
|
||||
#include "concretelang/Dialect/Concrete/IR/ConcreteDialect.h"
|
||||
#include "concretelang/Dialect/Concrete/IR/ConcreteOps.h"
|
||||
#include "concretelang/Dialect/SDFG/IR/SDFGDialect.h"
|
||||
#include "concretelang/Dialect/SDFG/IR/SDFGOps.h"
|
||||
#include "concretelang/Dialect/SDFG/IR/SDFGTypes.h"
|
||||
#include "concretelang/Dialect/SDFG/Transforms/BufferizableOpInterfaceImpl.h"
|
||||
#include "concretelang/Support/CompilerEngine.h"
|
||||
#include <concretelang/Dialect/SDFG/Transforms/Passes.h>
|
||||
#include <mlir/IR/AffineExpr.h>
|
||||
#include <mlir/IR/AffineMap.h>
|
||||
#include <mlir/IR/BuiltinTypes.h>
|
||||
|
||||
using namespace mlir;
|
||||
using namespace mlir::bufferization;
|
||||
using namespace mlir::tensor;
|
||||
|
||||
namespace SDFG = mlir::concretelang::SDFG;
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
namespace {
|
||||
|
||||
static void getAliasedUses(Value val, DenseSet<OpOperand *> &aliasedUses) {
|
||||
for (auto &use : val.getUses()) {
|
||||
aliasedUses.insert(&use);
|
||||
if (dyn_cast<ViewLikeOpInterface>(use.getOwner()))
|
||||
getAliasedUses(use.getOwner()->getResult(0), aliasedUses);
|
||||
}
|
||||
}
|
||||
|
||||
static func::FuncOp getCalledFunction(CallOpInterface callOp) {
|
||||
SymbolRefAttr sym = callOp.getCallableForCallee().dyn_cast<SymbolRefAttr>();
|
||||
if (!sym)
|
||||
return nullptr;
|
||||
return dyn_cast_or_null<func::FuncOp>(
|
||||
SymbolTable::lookupNearestSymbolFrom(callOp, sym));
|
||||
}
|
||||
|
||||
struct SDFGBufferOwnershipPass
|
||||
: public SDFGBufferOwnershipBase<SDFGBufferOwnershipPass> {
|
||||
|
||||
void runOnOperation() override {
|
||||
auto module = getOperation();
|
||||
std::vector<Operation *> deallocOps;
|
||||
|
||||
// Find all SDFG put operations that use a buffer only used for
|
||||
// this operation, then deallocated. In such cases there is no
|
||||
// need to copy the data again in the runtime and we can take
|
||||
// ownership of the buffer instead, removing the deallocation and
|
||||
// allowing the runtime to deallocate when appropriate.
|
||||
module.walk([&](mlir::memref::DeallocOp op) {
|
||||
DominanceInfo domInfo(op);
|
||||
Value alloc = op.getOperand();
|
||||
DenseSet<OpOperand *> aliasedUses;
|
||||
getAliasedUses(alloc, aliasedUses);
|
||||
|
||||
// Check if this memref is used in a SDFG put operation
|
||||
for (auto use : aliasedUses) {
|
||||
if (isa<mlir::func::CallOp>(use->getOwner())) {
|
||||
mlir::func::CallOp callOp = cast<func::CallOp>(use->getOwner());
|
||||
mlir::func::FuncOp funcOp = getCalledFunction(callOp);
|
||||
std::string putName = "stream_emulator_put_memref";
|
||||
if (funcOp.getName().str().compare(0, putName.size(), putName) == 0) {
|
||||
// If the put operation dominates the deallocation, then
|
||||
// ownership of the data can be transferred to the runtime
|
||||
// and deallocation can be removed. We mark the ownership
|
||||
// flag in the PUT operation to notify the runtime that it
|
||||
// gets ownership.
|
||||
if (domInfo.properlyDominates(callOp, op)) {
|
||||
deallocOps.push_back(op);
|
||||
OpBuilder builder(callOp);
|
||||
mlir::Value cst1 = builder.create<mlir::arith::ConstantOp>(
|
||||
callOp.getLoc(), builder.getI64IntegerAttr(1));
|
||||
callOp->setOperand(2, cst1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
for (auto dop : deallocOps) {
|
||||
dop->erase();
|
||||
}
|
||||
}
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
std::unique_ptr<mlir::Pass> createSDFGBufferOwnershipPass() {
|
||||
return std::make_unique<SDFGBufferOwnershipPass>();
|
||||
}
|
||||
|
||||
} // end namespace concretelang
|
||||
} // end namespace mlir
|
||||
@@ -997,6 +997,9 @@ public:
|
||||
return mlir::failure();
|
||||
}
|
||||
|
||||
if (oldOp->getAttr("_dfr_work_function_attribute"))
|
||||
newOp->setAttr("_dfr_work_function_attribute", rewriter.getUnitAttr());
|
||||
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
|
||||
@@ -1,12 +1,28 @@
|
||||
add_compile_options(-fsized-deallocation)
|
||||
|
||||
if(CONCRETELANG_CUDA_SUPPORT)
|
||||
add_library(ConcretelangRuntime SHARED context.cpp simulation.cpp wrappers.cpp DFRuntime.cpp key_manager.cpp
|
||||
GPUDFG.cpp)
|
||||
add_library(
|
||||
ConcretelangRuntime SHARED
|
||||
context.cpp
|
||||
utils.cpp
|
||||
simulation.cpp
|
||||
wrappers.cpp
|
||||
DFRuntime.cpp
|
||||
key_manager.cpp
|
||||
GPUDFG.cpp
|
||||
time_util.cpp)
|
||||
target_link_libraries(ConcretelangRuntime PRIVATE hwloc)
|
||||
else()
|
||||
add_library(ConcretelangRuntime SHARED context.cpp simulation.cpp wrappers.cpp DFRuntime.cpp key_manager.cpp
|
||||
StreamEmulator.cpp)
|
||||
add_library(
|
||||
ConcretelangRuntime SHARED
|
||||
context.cpp
|
||||
utils.cpp
|
||||
simulation.cpp
|
||||
wrappers.cpp
|
||||
DFRuntime.cpp
|
||||
key_manager.cpp
|
||||
GPUDFG.cpp
|
||||
time_util.cpp)
|
||||
endif()
|
||||
|
||||
add_dependencies(ConcretelangRuntime concrete_cpu concrete_cpu_noise_model concrete-protocol)
|
||||
|
||||
@@ -36,6 +36,10 @@ static size_t num_nodes = 0;
|
||||
static struct timespec init_timer, broadcast_timer, compute_timer, whole_timer;
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
void *dl_handle = nullptr;
|
||||
WorkFunctionRegistry *_dfr_node_level_work_function_registry;
|
||||
|
||||
} // namespace dfr
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
@@ -99,7 +103,6 @@ void _dfr_create_async_task(wfnptr wfn, void *ctx, size_t num_params,
|
||||
param_types.push_back(va_arg(args, uint64_t));
|
||||
}
|
||||
va_end(args);
|
||||
|
||||
dfr_create_async_task_impl(wfn, ctx, refcounted_futures, param_sizes,
|
||||
param_types, outputs, output_sizes, output_types);
|
||||
}
|
||||
@@ -179,6 +182,7 @@ bool _dfr_is_jit() { return is_jit_p; }
|
||||
bool _dfr_is_root_node() { return is_root_node_p; }
|
||||
bool _dfr_use_omp() { return use_omp_p; }
|
||||
bool _dfr_is_distributed() { return num_nodes > 1; }
|
||||
void _dfr_register_lib(void *dlh) { dl_handle = dlh; }
|
||||
} // namespace dfr
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
@@ -210,8 +214,10 @@ static inline void _dfr_stop_impl() {
|
||||
}
|
||||
|
||||
static inline void _dfr_start_impl(int argc, char *argv[]) {
|
||||
CONCRETELANG_ENABLE_TIMING();
|
||||
BEGIN_TIME(&init_timer);
|
||||
dl_handle = dlopen(nullptr, RTLD_NOW);
|
||||
if (dl_handle == nullptr)
|
||||
dl_handle = dlopen(nullptr, RTLD_NOW);
|
||||
|
||||
// If OpenMP is to be used, we need to force its initialization
|
||||
// before thread binding occurs. Otherwise OMP threads will be bound
|
||||
@@ -315,7 +321,7 @@ static inline void _dfr_start_impl(int argc, char *argv[]) {
|
||||
is_root_node_p = (hpx::find_here() == hpx::find_root_locality());
|
||||
num_nodes = hpx::get_num_localities().get();
|
||||
|
||||
new WorkFunctionRegistry();
|
||||
_dfr_node_level_work_function_registry = new WorkFunctionRegistry();
|
||||
|
||||
char *env = getenv("DFR_LAZY_KEY_TRANSFER");
|
||||
bool lazy = false;
|
||||
@@ -324,7 +330,7 @@ static inline void _dfr_start_impl(int argc, char *argv[]) {
|
||||
!strncmp(env, "On", 2) || !strncmp(env, "on", 2) ||
|
||||
!strncmp(env, "1", 1))
|
||||
lazy = true;
|
||||
new RuntimeContextManager(lazy);
|
||||
_dfr_node_level_runtime_context_manager = new RuntimeContextManager(lazy);
|
||||
|
||||
_dfr_jit_phase_barrier = new hpx::distributed::barrier(
|
||||
"phase_barrier", num_nodes, hpx::get_locality_id());
|
||||
@@ -346,6 +352,7 @@ static inline void _dfr_start_impl(int argc, char *argv[]) {
|
||||
JIT invocation). These serve to pause/resume the runtime
|
||||
scheduler and to clean up used resources. */
|
||||
void _dfr_start(int64_t use_dfr_p, void *ctx) {
|
||||
CONCRETELANG_ENABLE_TIMING();
|
||||
BEGIN_TIME(&whole_timer);
|
||||
if (use_dfr_p) {
|
||||
// The first invocation will initialise the runtime. As each call to
|
||||
@@ -365,12 +372,10 @@ void _dfr_start(int64_t use_dfr_p, void *ctx) {
|
||||
if (num_nodes > 1) {
|
||||
BEGIN_TIME(&broadcast_timer);
|
||||
_dfr_node_level_runtime_context_manager->setContext(ctx);
|
||||
}
|
||||
if (_dfr_is_root_node())
|
||||
_dfr_startup_barrier->wait();
|
||||
|
||||
if (num_nodes > 1 && ctx) {
|
||||
END_TIME(&broadcast_timer, "Key broadcasting");
|
||||
if (ctx) {
|
||||
END_TIME(&broadcast_timer, "Key broadcasting");
|
||||
}
|
||||
}
|
||||
}
|
||||
BEGIN_TIME(&compute_timer);
|
||||
@@ -383,24 +388,29 @@ void _dfr_start(int64_t use_dfr_p, void *ctx) {
|
||||
void _dfr_stop(int64_t use_dfr_p) {
|
||||
if (use_dfr_p) {
|
||||
if (num_nodes > 1) {
|
||||
// Non-root nodes synchronize here with the root to mark the point
|
||||
// where the root is free to send work out (only needed in JIT).
|
||||
if (!_dfr_is_root_node())
|
||||
_dfr_startup_barrier->wait();
|
||||
|
||||
// The barrier is only needed to synchronize the different
|
||||
// computation phases when the compute nodes need to generate and
|
||||
// register new work functions in each phase.
|
||||
_dfr_jit_phase_barrier->wait();
|
||||
|
||||
_dfr_node_level_runtime_context_manager->clearContext();
|
||||
_dfr_node_level_work_function_registry->clearRegistry();
|
||||
_dfr_jit_phase_barrier->wait();
|
||||
}
|
||||
}
|
||||
END_TIME(&compute_timer, "Compute");
|
||||
END_TIME(&whole_timer, "Total execution");
|
||||
}
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
namespace dfr {
|
||||
void _dfr_run_remote_scheduler() {
|
||||
_dfr_start(1, nullptr);
|
||||
_dfr_stop(1);
|
||||
}
|
||||
} // namespace dfr
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
void _dfr_try_initialize() {
|
||||
// Initialize and immediately suspend the HPX runtime if not yet done.
|
||||
uint64_t expected = uninitialised;
|
||||
@@ -484,6 +494,8 @@ bool _dfr_is_jit() { return is_jit_p; }
|
||||
bool _dfr_is_root_node() { return true; }
|
||||
bool _dfr_use_omp() { return use_omp_p; }
|
||||
bool _dfr_is_distributed() { return num_nodes > 1; }
|
||||
void _dfr_run_remote_scheduler() {}
|
||||
void _dfr_register_lib(void *dlh) {}
|
||||
|
||||
} // namespace dfr
|
||||
} // namespace concretelang
|
||||
@@ -491,8 +503,8 @@ bool _dfr_is_distributed() { return num_nodes > 1; }
|
||||
|
||||
using namespace mlir::concretelang::dfr;
|
||||
|
||||
void _dfr_start(int64_t use_dfr_p, void *ctx) { BEGIN_TIME(&compute_timer); }
|
||||
void _dfr_stop(int64_t use_dfr_p) { END_TIME(&compute_timer, "Compute"); }
|
||||
void _dfr_start(int64_t use_dfr_p, void *ctx) {}
|
||||
void _dfr_stop(int64_t use_dfr_p) {}
|
||||
|
||||
void _dfr_terminate() {}
|
||||
#endif
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#ifdef CONCRETELANG_CUDA_SUPPORT
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <cstdarg>
|
||||
@@ -18,15 +19,11 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <concretelang/Runtime/GPUDFG.hpp>
|
||||
#include <concretelang/Runtime/stream_emulator_api.h>
|
||||
#include <concretelang/Runtime/time_util.h>
|
||||
#include <concretelang/Runtime/wrappers.h>
|
||||
|
||||
#ifdef CONCRETELANG_CUDA_SUPPORT
|
||||
#include "bootstrap.h"
|
||||
#include "device.h"
|
||||
#include "keyswitch.h"
|
||||
#include "linear_algebra.h"
|
||||
|
||||
using RuntimeContext = mlir::concretelang::RuntimeContext;
|
||||
|
||||
namespace mlir {
|
||||
@@ -34,6 +31,10 @@ namespace concretelang {
|
||||
namespace gpu_dfg {
|
||||
namespace {
|
||||
|
||||
#if CONCRETELANG_TIMING_ENABLED
|
||||
static struct timespec init_timer, blocking_get_timer, acc1, acc2;
|
||||
#endif
|
||||
|
||||
using MemRef2 = MemRefDescriptor<2>;
|
||||
|
||||
// When not using all accelerators on the machine, we distribute work
|
||||
@@ -93,6 +94,7 @@ union Context {
|
||||
// across multiple locations.
|
||||
static const int32_t host_location = -1;
|
||||
static const int32_t split_location = -2;
|
||||
static const int32_t invalid_location = -3;
|
||||
// Similarly dependence chunks are either indexed (which does not
|
||||
// always correlate to the device index on which they are located) or
|
||||
// this dependence is split further.
|
||||
@@ -216,17 +218,12 @@ struct GPU_DFG {
|
||||
polynomial_size,
|
||||
input_lwe_ciphertext_count);
|
||||
}
|
||||
void free_streams() {
|
||||
streams.sort();
|
||||
streams.unique();
|
||||
for (auto s : streams)
|
||||
delete s;
|
||||
}
|
||||
inline void *get_gpu_stream(int32_t loc) {
|
||||
if (loc < 0)
|
||||
return nullptr;
|
||||
return gpus[loc].get_gpu_stream();
|
||||
}
|
||||
void free_streams();
|
||||
|
||||
private:
|
||||
std::list<void *> to_free_list;
|
||||
@@ -253,6 +250,7 @@ struct Dependence {
|
||||
int32_t chunk_id;
|
||||
size_t stream_generation;
|
||||
std::vector<Dependence *> chunks;
|
||||
std::vector<size_t> chunking_schedule;
|
||||
Dependence(int32_t l, MemRef2 hd, void *dd, bool ohr, bool alloc = false,
|
||||
int32_t chunk_id = single_chunk, size_t gen = 0)
|
||||
: location(l), host_data(hd), device_data(dd), onHostReady(ohr),
|
||||
@@ -266,17 +264,41 @@ struct Dependence {
|
||||
// Split a dependence into a number of chunks either to run on
|
||||
// multiple GPUs or execute concurrently on the host.
|
||||
void split_dependence(size_t num_chunks, size_t num_gpu_chunks,
|
||||
size_t chunk_dim, bool constant) {
|
||||
size_t chunk_dim, bool constant,
|
||||
size_t gpu_chunk_factor, GPU_DFG *dfg) {
|
||||
// If this dependence is already split, check that the split
|
||||
// matches the new request
|
||||
if (chunk_id == split_chunks) {
|
||||
if (num_chunks + num_gpu_chunks != chunks.size())
|
||||
warnx("WARNING: requesting to split dependence across different number "
|
||||
"of chunks (%lu) than it already is split (%lu) which would "
|
||||
"require remapping. This is not supported.",
|
||||
num_chunks + num_gpu_chunks, chunks.size());
|
||||
assert(num_chunks + num_gpu_chunks == chunks.size());
|
||||
return;
|
||||
if (num_chunks + num_gpu_chunks != chunks.size()) {
|
||||
// If this is not available on host, then we need to merge on
|
||||
// host and re-split
|
||||
if (!onHostReady) {
|
||||
size_t data_size = 0;
|
||||
size_t num_samples = 0;
|
||||
for (auto c : chunks) {
|
||||
move_chunk_off_device(c->chunk_id, dfg);
|
||||
data_size += memref_get_data_size(c->host_data);
|
||||
num_samples += c->host_data.sizes[chunk_dim];
|
||||
sdfg_gpu_debug_print_mref("Chunk", c->host_data);
|
||||
}
|
||||
host_data = chunks[0]->host_data;
|
||||
host_data.allocated = host_data.aligned =
|
||||
(uint64_t *)malloc(data_size);
|
||||
host_data.sizes[chunk_dim] = num_samples;
|
||||
size_t pos = 0;
|
||||
for (auto c : chunks) {
|
||||
memcpy(((char *)host_data.aligned) + pos, c->host_data.aligned,
|
||||
memref_get_data_size(c->host_data));
|
||||
pos += memref_get_data_size(c->host_data);
|
||||
}
|
||||
for (auto c : chunks)
|
||||
free_chunk_host_data(c->chunk_id, dfg);
|
||||
onHostReady = true;
|
||||
hostAllocated = true;
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!chunks.empty()) {
|
||||
for (auto c : chunks)
|
||||
@@ -301,8 +323,8 @@ struct Dependence {
|
||||
return;
|
||||
}
|
||||
size_t chunk_size =
|
||||
num_samples / (num_chunks + num_gpu_chunks * device_compute_factor);
|
||||
size_t gpu_chunk_size = chunk_size * device_compute_factor;
|
||||
num_samples / (num_chunks + num_gpu_chunks * gpu_chunk_factor);
|
||||
size_t gpu_chunk_size = chunk_size * gpu_chunk_factor;
|
||||
chunk_size = (num_samples - gpu_chunk_size * num_gpu_chunks) / num_chunks;
|
||||
size_t chunk_remainder =
|
||||
(num_samples - gpu_chunk_size * num_gpu_chunks) % num_chunks;
|
||||
@@ -331,62 +353,43 @@ struct Dependence {
|
||||
chunk_id = split_chunks;
|
||||
location = split_location;
|
||||
}
|
||||
void merge_dependence(GPU_DFG *dfg) {
|
||||
void finalize_merged_dependence(GPU_DFG *dfg) {
|
||||
assert(!chunks.empty() && "Cannot merge dependence with no chunks");
|
||||
size_t data_size = 0;
|
||||
size_t num_samples = 0;
|
||||
for (auto c : chunks) {
|
||||
data_size += memref_get_data_size(c->host_data);
|
||||
num_samples += c->host_data.sizes[0];
|
||||
}
|
||||
uint64_t *data = (uint64_t *)malloc(data_size);
|
||||
MemRef2 output = {data,
|
||||
data,
|
||||
0,
|
||||
{num_samples, chunks.front()->host_data.sizes[1]},
|
||||
{chunks.front()->host_data.sizes[1], 1}};
|
||||
|
||||
std::list<cudaStream_t *> custreams_used;
|
||||
for (auto c : chunks) {
|
||||
// Write out the piece in the final target dependence
|
||||
size_t csize = memref_get_data_size(c->host_data);
|
||||
if (c->onHostReady) {
|
||||
memcpy(((char *)output.aligned) + output.offset, c->host_data.aligned,
|
||||
csize);
|
||||
} else {
|
||||
assert(c->location > host_location);
|
||||
cudaStream_t *s = (cudaStream_t *)dfg->get_gpu_stream(c->location);
|
||||
cuda_memcpy_async_to_cpu(((char *)output.aligned) + output.offset,
|
||||
c->device_data, csize, s, c->location);
|
||||
custreams_used.push_back(s);
|
||||
}
|
||||
output.offset += csize;
|
||||
}
|
||||
output.offset = 0;
|
||||
for (auto c : chunks)
|
||||
c->free_data(dfg, true);
|
||||
assert(host_data.allocated != nullptr);
|
||||
chunks.clear();
|
||||
|
||||
custreams_used.sort();
|
||||
custreams_used.unique();
|
||||
for (auto s : custreams_used)
|
||||
cudaStreamSynchronize(*s);
|
||||
|
||||
location = host_location;
|
||||
onHostReady = true;
|
||||
assert(host_data.allocated == nullptr);
|
||||
host_data = output;
|
||||
assert(device_data == nullptr);
|
||||
hostAllocated = true;
|
||||
chunk_id = single_chunk;
|
||||
}
|
||||
void copy_chunk_off_device(int32_t chunk_id, GPU_DFG *dfg) {
|
||||
if (chunks[chunk_id]->onHostReady)
|
||||
return;
|
||||
chunks[chunk_id]->copy(host_location, dfg, false);
|
||||
}
|
||||
void move_chunk_off_device(int32_t chunk_id, GPU_DFG *dfg) {
|
||||
chunks[chunk_id]->copy(host_location, dfg);
|
||||
copy_chunk_off_device(chunk_id, dfg);
|
||||
chunks[chunk_id]->location = host_location;
|
||||
if (chunks[chunk_id]->device_data == nullptr)
|
||||
return;
|
||||
cuda_drop_async(
|
||||
chunks[chunk_id]->device_data,
|
||||
(cudaStream_t *)dfg->get_gpu_stream(chunks[chunk_id]->location),
|
||||
chunks[chunk_id]->location);
|
||||
chunks[chunk_id]->location = host_location;
|
||||
chunks[chunk_id]->device_data = nullptr;
|
||||
}
|
||||
void merge_output_off_device(int32_t chunk_id, GPU_DFG *dfg) {
|
||||
assert(chunks[chunk_id]->location > host_location);
|
||||
size_t data_offset = 0;
|
||||
for (int32_t c = 0; c < chunk_id; ++c)
|
||||
data_offset +=
|
||||
chunking_schedule[c] * host_data.sizes[1] * sizeof(uint64_t);
|
||||
size_t csize = memref_get_data_size(chunks[chunk_id]->host_data);
|
||||
cudaStream_t *s =
|
||||
(cudaStream_t *)dfg->get_gpu_stream(chunks[chunk_id]->location);
|
||||
cuda_memcpy_async_to_cpu(((char *)host_data.aligned) + data_offset,
|
||||
chunks[chunk_id]->device_data, csize, s,
|
||||
chunks[chunk_id]->location);
|
||||
}
|
||||
void free_chunk_host_data(int32_t chunk_id, GPU_DFG *dfg) {
|
||||
assert(chunks[chunk_id]->location == host_location &&
|
||||
@@ -397,16 +400,18 @@ struct Dependence {
|
||||
chunks[chunk_id]->onHostReady = false;
|
||||
}
|
||||
void free_chunk_device_data(int32_t chunk_id, GPU_DFG *dfg) {
|
||||
assert(chunks[chunk_id]->location > host_location &&
|
||||
chunks[chunk_id]->device_data != nullptr);
|
||||
if (chunks[chunk_id]->device_data == nullptr)
|
||||
return;
|
||||
cuda_drop_async(
|
||||
chunks[chunk_id]->device_data,
|
||||
(cudaStream_t *)dfg->get_gpu_stream(chunks[chunk_id]->location),
|
||||
chunks[chunk_id]->location);
|
||||
chunks[chunk_id]->device_data = nullptr;
|
||||
chunks[chunk_id]->location =
|
||||
(chunks[chunk_id]->onHostReady) ? host_location : invalid_location;
|
||||
}
|
||||
inline void free_data(GPU_DFG *dfg, bool immediate = false) {
|
||||
if (location >= 0 && device_data != nullptr) {
|
||||
if (device_data != nullptr) {
|
||||
cuda_drop_async(device_data,
|
||||
(cudaStream_t *)dfg->get_gpu_stream(location), location);
|
||||
}
|
||||
@@ -414,17 +419,19 @@ struct Dependence {
|
||||
// As streams are not synchronized aside from the GET operation,
|
||||
// we cannot free host-side data until after the synchronization
|
||||
// point as it could still be used by an asynchronous operation.
|
||||
if (immediate)
|
||||
if (immediate) {
|
||||
free(host_data.allocated);
|
||||
else
|
||||
host_data.allocated = nullptr;
|
||||
} else {
|
||||
dfg->register_stream_order_dependent_allocation(host_data.allocated);
|
||||
}
|
||||
}
|
||||
for (auto c : chunks)
|
||||
c->free_data(dfg, immediate);
|
||||
chunks.clear();
|
||||
delete (this);
|
||||
}
|
||||
inline void copy(int32_t loc, GPU_DFG *dfg) {
|
||||
inline void copy(int32_t loc, GPU_DFG *dfg, bool synchronize = true) {
|
||||
size_t data_size = memref_get_data_size(host_data);
|
||||
if (loc == location)
|
||||
return;
|
||||
@@ -438,7 +445,8 @@ struct Dependence {
|
||||
cudaStream_t *s = (cudaStream_t *)dfg->get_gpu_stream(location);
|
||||
cuda_memcpy_async_to_cpu(host_data.aligned, device_data, data_size, s,
|
||||
location);
|
||||
cudaStreamSynchronize(*s);
|
||||
if (synchronize)
|
||||
cudaStreamSynchronize(*s);
|
||||
onHostReady = true;
|
||||
} else {
|
||||
assert(onHostReady &&
|
||||
@@ -508,6 +516,7 @@ struct Stream {
|
||||
bool ct_stream;
|
||||
bool pt_stream;
|
||||
size_t generation;
|
||||
std::atomic<size_t> uses = {0};
|
||||
const char *name;
|
||||
Stream(stream_type t, const char *sname = nullptr)
|
||||
: dep(nullptr), type(t), producer(nullptr), dfg(nullptr),
|
||||
@@ -524,7 +533,7 @@ struct Stream {
|
||||
}
|
||||
~Stream() {
|
||||
if (dep != nullptr)
|
||||
dep->free_data(dfg);
|
||||
dep->free_data(dfg, true);
|
||||
if (producer != nullptr)
|
||||
delete producer;
|
||||
}
|
||||
@@ -547,6 +556,7 @@ struct Stream {
|
||||
dep = d;
|
||||
}
|
||||
dep->stream_generation = generation;
|
||||
uses = 0;
|
||||
}
|
||||
// For a given dependence, traverse the DFG backwards to extract the lattice
|
||||
// of kernels required to execute to produce this data
|
||||
@@ -579,18 +589,22 @@ struct Stream {
|
||||
size_t subgraph_bootstraps = 0;
|
||||
for (auto p : queue) {
|
||||
is_batched_subgraph |= p->batched_process;
|
||||
subgraph_bootstraps +=
|
||||
(p->fun == memref_bootstrap_lwe_u64_process) ? 1 : 0;
|
||||
subgraph_bootstraps += (p->fun == memref_bootstrap_lwe_u64_process ||
|
||||
p->fun == memref_keyswitch_lwe_u64_process)
|
||||
? 1
|
||||
: 0;
|
||||
}
|
||||
// If this subgraph is not batched, then use this DFG's allocated
|
||||
// GPU to offload to. If this does not bootstrap, just execute on
|
||||
// the host.
|
||||
if (!is_batched_subgraph) {
|
||||
for (auto p : queue) {
|
||||
schedule_kernel(
|
||||
p, (subgraph_bootstraps > 0) ? dfg->gpu_idx : host_location,
|
||||
single_chunk, nullptr);
|
||||
}
|
||||
int32_t loc = (subgraph_bootstraps > 0) ? dfg->gpu_idx : host_location;
|
||||
for (auto p : queue)
|
||||
schedule_kernel(p, loc, single_chunk,
|
||||
(p == producer) ? out.aligned : nullptr);
|
||||
if (loc != host_location)
|
||||
dep->copy(host_location, dfg, true);
|
||||
dep->onHostReady = true;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -645,8 +659,6 @@ struct Stream {
|
||||
num_real_inputs++;
|
||||
if (s->dep->host_data.sizes[0] > num_samples)
|
||||
num_samples = s->dep->host_data.sizes[0];
|
||||
if (!s->dep->chunks.empty())
|
||||
num_samples = s->dep->chunks.size();
|
||||
} else {
|
||||
mem_per_sample += sizeof(uint64_t);
|
||||
}
|
||||
@@ -657,7 +669,7 @@ struct Stream {
|
||||
(num_real_inputs ? num_real_inputs : 1);
|
||||
size_t num_chunks = 1;
|
||||
size_t num_gpu_chunks = 0;
|
||||
int32_t num_devices_to_use = 0;
|
||||
size_t gpu_chunk_factor = device_compute_factor;
|
||||
// If the subgraph does not have sufficient computational
|
||||
// intensity (which we approximate by whether it bootstraps), then
|
||||
// we assume (TODO: confirm with profiling) that it is not
|
||||
@@ -681,16 +693,20 @@ struct Stream {
|
||||
(available_mem - const_mem_per_sample) /
|
||||
((mem_per_sample ? mem_per_sample : 1) * gpu_memory_inflation_factor);
|
||||
|
||||
if (num_samples < num_cores + device_compute_factor * num_devices) {
|
||||
num_devices_to_use = 0;
|
||||
while (gpu_chunk_factor > 4) {
|
||||
if (num_samples < num_cores + gpu_chunk_factor * num_devices)
|
||||
gpu_chunk_factor >>= 1;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if (num_samples < num_cores + gpu_chunk_factor * num_devices) {
|
||||
num_chunks = std::min(num_cores, num_samples);
|
||||
} else {
|
||||
num_devices_to_use = num_devices;
|
||||
size_t compute_resources =
|
||||
num_cores + num_devices * device_compute_factor;
|
||||
size_t compute_resources = num_cores + num_devices * gpu_chunk_factor;
|
||||
size_t gpu_chunk_size =
|
||||
std::ceil((double)num_samples / compute_resources) *
|
||||
device_compute_factor;
|
||||
gpu_chunk_factor;
|
||||
size_t scale_factor =
|
||||
std::ceil((double)gpu_chunk_size / max_samples_per_chunk);
|
||||
num_chunks = num_cores * scale_factor;
|
||||
@@ -702,7 +718,8 @@ struct Stream {
|
||||
|
||||
for (auto i : inputs)
|
||||
i->dep->split_dependence(num_chunks, num_gpu_chunks,
|
||||
(i->ct_stream) ? 0 : 1, i->const_stream);
|
||||
(i->ct_stream) ? 0 : 1, i->const_stream,
|
||||
gpu_chunk_factor, dfg);
|
||||
for (auto iv : intermediate_values) {
|
||||
if (iv->need_new_gen()) {
|
||||
iv->put(new Dependence(split_location,
|
||||
@@ -712,21 +729,67 @@ struct Stream {
|
||||
}
|
||||
}
|
||||
for (auto o : outputs) {
|
||||
if (o->need_new_gen()) {
|
||||
o->put(new Dependence(split_location,
|
||||
{nullptr, nullptr, 0, {0, 0}, {0, 0}}, nullptr,
|
||||
false, false, split_chunks));
|
||||
o->dep->chunks.resize(num_chunks + num_gpu_chunks, nullptr);
|
||||
if (!o->need_new_gen())
|
||||
continue;
|
||||
std::function<uint64_t(Stream *)> get_output_size =
|
||||
[&](Stream *s) -> uint64_t {
|
||||
uint64_t res = 0;
|
||||
// If this stream is not produced within SDFG, we could use
|
||||
// the input size. For now return 0.
|
||||
if (s->producer == nullptr)
|
||||
return 0;
|
||||
// If the producer process has an output size registered,
|
||||
// return it.
|
||||
if (s->producer->output_size.val > 0)
|
||||
return s->producer->output_size.val;
|
||||
// Finally we look for sizes from inputs to the producer if
|
||||
// we don't have it registered as poly size does not change
|
||||
// in operators that do not register size.
|
||||
for (auto p : s->producer->input_streams) {
|
||||
uint64_t p_size = get_output_size(p);
|
||||
if (p_size == 0)
|
||||
continue;
|
||||
if (res == 0)
|
||||
res = get_output_size(p);
|
||||
else
|
||||
assert(res == p_size);
|
||||
}
|
||||
return res;
|
||||
};
|
||||
MemRef2 out_mref;
|
||||
bool allocated = false;
|
||||
if (o == this) {
|
||||
out_mref = out;
|
||||
} else {
|
||||
uint64_t output_size = get_output_size(o);
|
||||
out_mref = {0, 0, 0, {num_samples, output_size}, {output_size, 1}};
|
||||
size_t data_size = memref_get_data_size(out_mref);
|
||||
out_mref.allocated = out_mref.aligned = (uint64_t *)malloc(data_size);
|
||||
allocated = true;
|
||||
}
|
||||
|
||||
o->put(new Dependence(split_location, out_mref, nullptr, false, allocated,
|
||||
split_chunks));
|
||||
o->dep->chunks.resize(num_chunks + num_gpu_chunks, nullptr);
|
||||
}
|
||||
for (auto o : outputs) {
|
||||
o->dep->chunking_schedule.clear();
|
||||
for (auto i : inputs) {
|
||||
size_t cdim = (i->ct_stream) ? 0 : 1;
|
||||
if (i->dep->host_data.sizes[cdim] == num_samples) {
|
||||
for (auto c : i->dep->chunks)
|
||||
o->dep->chunking_schedule.push_back(c->host_data.sizes[cdim]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Execute graph
|
||||
std::list<std::thread> workers;
|
||||
std::list<std::thread> gpu_schedulers;
|
||||
std::vector<std::list<size_t>> gpu_chunk_list;
|
||||
gpu_chunk_list.resize(num_devices);
|
||||
int32_t dev = 0;
|
||||
for (size_t c = 0; c < num_chunks + num_gpu_chunks; ++c) {
|
||||
for (int c = num_chunks + num_gpu_chunks - 1; c >= 0; --c) {
|
||||
if (!subgraph_bootstraps) {
|
||||
workers.push_back(std::thread(
|
||||
[&](std::list<Process *> queue, size_t c, int32_t host_location) {
|
||||
@@ -742,7 +805,21 @@ struct Stream {
|
||||
workers.push_back(std::thread(
|
||||
[&](std::list<Process *> queue, size_t c, int32_t host_location) {
|
||||
for (auto p : queue) {
|
||||
schedule_kernel(p, host_location, c, nullptr);
|
||||
Stream *os = p->output_streams[0];
|
||||
auto it = std::find(outputs.begin(), outputs.end(), os);
|
||||
if (it == outputs.end()) {
|
||||
schedule_kernel(p, host_location, c, nullptr);
|
||||
} else {
|
||||
size_t data_offset = 0;
|
||||
for (int32_t ch = 0; ch < c; ++ch)
|
||||
data_offset +=
|
||||
outputs.front()->dep->chunking_schedule[ch] *
|
||||
os->dep->host_data.sizes[1] * sizeof(uint64_t);
|
||||
schedule_kernel(
|
||||
p, host_location, c,
|
||||
(uint64_t *)(((char *)os->dep->host_data.aligned) +
|
||||
data_offset));
|
||||
}
|
||||
}
|
||||
for (auto iv : intermediate_values)
|
||||
if (iv->consumers.size() == 1)
|
||||
@@ -765,15 +842,35 @@ struct Stream {
|
||||
assert(status == cudaSuccess);
|
||||
cudaMemGetInfo(&gpu_free_mem, &gpu_total_mem);
|
||||
assert(status == cudaSuccess);
|
||||
for (auto p : queue)
|
||||
for (auto p : queue) {
|
||||
schedule_kernel(p, dev, c, nullptr);
|
||||
for (auto out_str : p->output_streams) {
|
||||
// For all output streams, if this is an output,
|
||||
// schedule copy out of the data produced by this
|
||||
// process.
|
||||
if (auto it =
|
||||
std::find(outputs.begin(), outputs.end(), out_str);
|
||||
it != outputs.end()) {
|
||||
out_str->dep->merge_output_off_device(c, dfg);
|
||||
continue;
|
||||
}
|
||||
// If this is not an output, but some process is not
|
||||
// part of this subgraph, we need to copy the data
|
||||
// out.
|
||||
for (auto cons_proc : out_str->consumers)
|
||||
if (auto it =
|
||||
std::find(queue.begin(), queue.end(), cons_proc);
|
||||
it == queue.end())
|
||||
out_str->dep->copy_chunk_off_device(c, dfg);
|
||||
}
|
||||
}
|
||||
for (auto i : inputs)
|
||||
if (++i->uses == i->consumers.size())
|
||||
i->dep->free_chunk_device_data(c, dfg);
|
||||
for (auto iv : intermediate_values)
|
||||
if (iv->consumers.size() > 1)
|
||||
iv->dep->move_chunk_off_device(c, dfg);
|
||||
else
|
||||
iv->dep->free_chunk_device_data(c, dfg);
|
||||
iv->dep->free_chunk_device_data(c, dfg);
|
||||
for (auto o : outputs)
|
||||
o->dep->move_chunk_off_device(c, dfg);
|
||||
o->dep->free_chunk_device_data(c, dfg);
|
||||
cudaStreamSynchronize(*(cudaStream_t *)dfg->get_gpu_stream(dev));
|
||||
}
|
||||
},
|
||||
@@ -786,65 +883,43 @@ struct Stream {
|
||||
gs.join();
|
||||
gpu_schedulers.clear();
|
||||
// Build output out of the separate chunks processed
|
||||
for (auto o : outputs) {
|
||||
assert(o->batched_stream && o->ct_stream &&
|
||||
"Only operations with ciphertext output supported.");
|
||||
o->dep->merge_dependence(dfg);
|
||||
}
|
||||
for (auto o : outputs)
|
||||
o->dep->finalize_merged_dependence(dfg);
|
||||
for (dev = 0; dev < num_devices; ++dev)
|
||||
cudaStreamSynchronize(*(cudaStream_t *)dfg->get_gpu_stream(dev));
|
||||
// We will assume that only one subgraph is being processed per
|
||||
// DFG at a time, so we can safely free these here.
|
||||
dfg->free_stream_order_dependent_data();
|
||||
return;
|
||||
}
|
||||
Dependence *get_on_host(MemRef2 &out) {
|
||||
void get_on_host(MemRef2 &out) {
|
||||
// Schedule the execution of the SDFG subgraph required to compute
|
||||
// the value requested
|
||||
schedule_work(out);
|
||||
assert(dep != nullptr && "GET on empty stream not allowed.");
|
||||
// If this was already copied to host, copy out
|
||||
if (dep->onHostReady) {
|
||||
memref_copy_contiguous(out, dep->host_data);
|
||||
return dep;
|
||||
} else if (dep->location == split_location) {
|
||||
char *pos = (char *)(out.aligned + out.offset);
|
||||
std::list<int32_t> devices_used;
|
||||
for (auto c : dep->chunks) {
|
||||
size_t data_size = memref_get_data_size(c->host_data);
|
||||
cuda_memcpy_async_to_cpu(
|
||||
pos, c->device_data, data_size,
|
||||
(cudaStream_t *)dfg->get_gpu_stream(c->location), c->location);
|
||||
pos += data_size;
|
||||
devices_used.push_back(c->location);
|
||||
}
|
||||
// We should only synchronize devices that had data chunks
|
||||
devices_used.sort();
|
||||
devices_used.unique();
|
||||
for (auto i : devices_used)
|
||||
cudaStreamSynchronize(*(cudaStream_t *)dfg->get_gpu_stream(i));
|
||||
} else {
|
||||
size_t data_size = memref_get_data_size(dep->host_data);
|
||||
cuda_memcpy_async_to_cpu(out.aligned + out.offset, dep->device_data,
|
||||
data_size, (cudaStream_t *)dfg->gpu_stream,
|
||||
dep->location);
|
||||
cudaStreamSynchronize(*(cudaStream_t *)dfg->gpu_stream);
|
||||
}
|
||||
// After this synchronization point, all of the host-side
|
||||
// allocated memory can be freed as we know all asynchronous
|
||||
// operations have finished.
|
||||
dfg->free_stream_order_dependent_data();
|
||||
|
||||
// The result should already have been copied to host, nothing to
|
||||
// do (synchronization of transfers to host are pre-synchronized
|
||||
// in schedule_work).
|
||||
assert(dep != nullptr && dep->onHostReady);
|
||||
|
||||
// For now we make a copy of this dependence for future use as we
|
||||
// can't assume that the output location will remain live until
|
||||
// the next use.
|
||||
// TODO: eliminate this copy.
|
||||
if (!dep->hostAllocated)
|
||||
dep->host_data = memref_copy_alloc(out);
|
||||
dep->onHostReady = true;
|
||||
dep->hostAllocated = true;
|
||||
return dep;
|
||||
}
|
||||
Dependence *get(int32_t location, int32_t chunk_id = single_chunk) {
|
||||
assert(dep != nullptr && "Dependence could not be computed.");
|
||||
assert(chunk_id != split_chunks);
|
||||
if (chunk_id != single_chunk) {
|
||||
Dependence *d = dep->chunks[chunk_id];
|
||||
d->copy(location, dfg);
|
||||
d->copy(location, dfg, false);
|
||||
return d;
|
||||
}
|
||||
dep->copy(location, dfg);
|
||||
dep->copy(location, dfg, false);
|
||||
return dep;
|
||||
}
|
||||
inline bool need_new_gen(int32_t chunk_id = single_chunk) {
|
||||
@@ -863,6 +938,13 @@ struct Stream {
|
||||
}
|
||||
};
|
||||
|
||||
void GPU_DFG::free_streams() {
|
||||
streams.sort();
|
||||
streams.unique();
|
||||
for (auto s : streams)
|
||||
delete s;
|
||||
}
|
||||
|
||||
static inline mlir::concretelang::gpu_dfg::Process *
|
||||
make_process_1_1(void *dfg, void *sin1, void *sout,
|
||||
void (*fun)(Process *, int32_t, int32_t, uint64_t *)) {
|
||||
@@ -882,6 +964,7 @@ make_process_1_1(void *dfg, void *sin1, void *sout,
|
||||
p->dfg->register_stream(s1);
|
||||
p->dfg->register_stream(so);
|
||||
p->batched_process = s1->batched_stream;
|
||||
p->output_size.val = 0;
|
||||
return p;
|
||||
}
|
||||
|
||||
@@ -909,6 +992,7 @@ make_process_2_1(void *dfg, void *sin1, void *sin2, void *sout,
|
||||
p->dfg->register_stream(s2);
|
||||
p->dfg->register_stream(so);
|
||||
p->batched_process = s1->batched_stream;
|
||||
p->output_size.val = 0;
|
||||
return p;
|
||||
}
|
||||
|
||||
@@ -957,8 +1041,11 @@ void memref_keyswitch_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
|
||||
uint64_t *out_ptr) {
|
||||
auto sched = [&](Dependence *d) {
|
||||
uint64_t num_samples = d->host_data.sizes[0];
|
||||
MemRef2 out = {
|
||||
0, 0, 0, {num_samples, p->output_size.val}, {p->output_size.val, 1}};
|
||||
MemRef2 out = {out_ptr,
|
||||
out_ptr,
|
||||
0,
|
||||
{num_samples, p->output_size.val},
|
||||
{p->output_size.val, 1}};
|
||||
size_t data_size = memref_get_data_size(out);
|
||||
if (loc == host_location) {
|
||||
// If it is not profitable to offload, schedule kernel on CPU
|
||||
@@ -972,8 +1059,8 @@ void memref_keyswitch_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
|
||||
d->host_data.strides[1], p->level.val, p->base_log.val,
|
||||
p->input_lwe_dim.val, p->output_lwe_dim.val, p->sk_index.val,
|
||||
p->ctx.val);
|
||||
Dependence *dep =
|
||||
new Dependence(loc, out, nullptr, true, true, d->chunk_id);
|
||||
Dependence *dep = new Dependence(loc, out, nullptr, true,
|
||||
(out_ptr == nullptr), d->chunk_id);
|
||||
return dep;
|
||||
} else {
|
||||
// Schedule the keyswitch kernel on the GPU
|
||||
@@ -999,7 +1086,8 @@ void memref_keyswitch_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
|
||||
void memref_bootstrap_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
|
||||
uint64_t *out_ptr) {
|
||||
assert(p->output_size.val == p->glwe_dim.val * p->poly_size.val + 1);
|
||||
|
||||
if (!p->output_streams[0]->need_new_gen(chunk_id))
|
||||
return;
|
||||
Dependence *idep1 = p->input_streams[1]->get(host_location, chunk_id);
|
||||
MemRef2 &mtlu = idep1->host_data;
|
||||
uint32_t num_lut_vectors = mtlu.sizes[0];
|
||||
@@ -1023,8 +1111,11 @@ void memref_bootstrap_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
|
||||
std::vector<size_t> &lut_indexes, cudaStream_t *s,
|
||||
int32_t loc) {
|
||||
uint64_t num_samples = d0->host_data.sizes[0];
|
||||
MemRef2 out = {
|
||||
0, 0, 0, {num_samples, p->output_size.val}, {p->output_size.val, 1}};
|
||||
MemRef2 out = {out_ptr,
|
||||
out_ptr,
|
||||
0,
|
||||
{num_samples, p->output_size.val},
|
||||
{p->output_size.val, 1}};
|
||||
size_t data_size = memref_get_data_size(out);
|
||||
|
||||
// Move test vector indexes to the GPU, the test vector indexes is set of 0
|
||||
@@ -1065,8 +1156,8 @@ void memref_bootstrap_lwe_u64_process(Process *p, int32_t loc, int32_t chunk_id,
|
||||
d1->host_data.strides[1], p->input_lwe_dim.val, p->poly_size.val,
|
||||
p->level.val, p->base_log.val, p->glwe_dim.val, p->sk_index.val,
|
||||
p->ctx.val);
|
||||
Dependence *dep =
|
||||
new Dependence(loc, out, nullptr, true, true, d0->chunk_id);
|
||||
Dependence *dep = new Dependence(loc, out, nullptr, true,
|
||||
(out_ptr == nullptr), d0->chunk_id);
|
||||
free(glwe_ct);
|
||||
return dep;
|
||||
} else {
|
||||
@@ -1129,8 +1220,8 @@ void memref_add_lwe_ciphertexts_u64_process(Process *p, int32_t loc,
|
||||
assert(d0->host_data.sizes[1] == d1->host_data.sizes[1]);
|
||||
assert(d0->chunk_id == d1->chunk_id);
|
||||
uint64_t num_samples = d0->host_data.sizes[0];
|
||||
MemRef2 out = {0,
|
||||
0,
|
||||
MemRef2 out = {out_ptr,
|
||||
out_ptr,
|
||||
0,
|
||||
{num_samples, d0->host_data.sizes[1]},
|
||||
{d0->host_data.sizes[1], 1}};
|
||||
@@ -1148,8 +1239,8 @@ void memref_add_lwe_ciphertexts_u64_process(Process *p, int32_t loc,
|
||||
d1->host_data.aligned, d1->host_data.offset, d1->host_data.sizes[0],
|
||||
d1->host_data.sizes[1], d1->host_data.strides[0],
|
||||
d1->host_data.strides[1]);
|
||||
Dependence *dep =
|
||||
new Dependence(loc, out, nullptr, true, true, d0->chunk_id);
|
||||
Dependence *dep = new Dependence(loc, out, nullptr, true,
|
||||
(out_ptr == nullptr), d0->chunk_id);
|
||||
return dep;
|
||||
} else {
|
||||
// Schedule the kernel on the GPU
|
||||
@@ -1179,8 +1270,8 @@ void memref_add_plaintext_lwe_ciphertext_u64_process(Process *p, int32_t loc,
|
||||
d1->host_data.sizes[1] == 1);
|
||||
assert(d0->chunk_id == d1->chunk_id);
|
||||
uint64_t num_samples = d0->host_data.sizes[0];
|
||||
MemRef2 out = {0,
|
||||
0,
|
||||
MemRef2 out = {out_ptr,
|
||||
out_ptr,
|
||||
0,
|
||||
{num_samples, d0->host_data.sizes[1]},
|
||||
{d0->host_data.sizes[1], 1}};
|
||||
@@ -1206,8 +1297,8 @@ void memref_add_plaintext_lwe_ciphertext_u64_process(Process *p, int32_t loc,
|
||||
d0->host_data.strides[1], d1->host_data.allocated,
|
||||
d1->host_data.aligned, d1->host_data.offset, d1->host_data.sizes[1],
|
||||
d1->host_data.strides[1]);
|
||||
Dependence *dep =
|
||||
new Dependence(loc, out, nullptr, true, true, d0->chunk_id);
|
||||
Dependence *dep = new Dependence(loc, out, nullptr, true,
|
||||
(out_ptr == nullptr), d0->chunk_id);
|
||||
return dep;
|
||||
} else {
|
||||
// Schedule the kernel on the GPU
|
||||
@@ -1237,8 +1328,8 @@ void memref_mul_cleartext_lwe_ciphertext_u64_process(Process *p, int32_t loc,
|
||||
d1->host_data.sizes[1] == 1);
|
||||
assert(d0->chunk_id == d1->chunk_id);
|
||||
uint64_t num_samples = d0->host_data.sizes[0];
|
||||
MemRef2 out = {0,
|
||||
0,
|
||||
MemRef2 out = {out_ptr,
|
||||
out_ptr,
|
||||
0,
|
||||
{num_samples, d0->host_data.sizes[1]},
|
||||
{d0->host_data.sizes[1], 1}};
|
||||
@@ -1264,8 +1355,8 @@ void memref_mul_cleartext_lwe_ciphertext_u64_process(Process *p, int32_t loc,
|
||||
d0->host_data.strides[1], d1->host_data.allocated,
|
||||
d1->host_data.aligned, d1->host_data.offset, d1->host_data.sizes[1],
|
||||
d1->host_data.strides[1]);
|
||||
Dependence *dep =
|
||||
new Dependence(loc, out, nullptr, true, true, d0->chunk_id);
|
||||
Dependence *dep = new Dependence(loc, out, nullptr, true,
|
||||
(out_ptr == nullptr), d0->chunk_id);
|
||||
return dep;
|
||||
} else {
|
||||
// Schedule the keyswitch kernel on the GPU
|
||||
@@ -1291,8 +1382,8 @@ void memref_negate_lwe_ciphertext_u64_process(Process *p, int32_t loc,
|
||||
uint64_t *out_ptr) {
|
||||
auto sched = [&](Dependence *d0, cudaStream_t *s, int32_t loc) {
|
||||
uint64_t num_samples = d0->host_data.sizes[0];
|
||||
MemRef2 out = {0,
|
||||
0,
|
||||
MemRef2 out = {out_ptr,
|
||||
out_ptr,
|
||||
0,
|
||||
{num_samples, d0->host_data.sizes[1]},
|
||||
{d0->host_data.sizes[1], 1}};
|
||||
@@ -1307,8 +1398,8 @@ void memref_negate_lwe_ciphertext_u64_process(Process *p, int32_t loc,
|
||||
d0->host_data.aligned, d0->host_data.offset, d0->host_data.sizes[0],
|
||||
d0->host_data.sizes[1], d0->host_data.strides[0],
|
||||
d0->host_data.strides[1]);
|
||||
Dependence *dep =
|
||||
new Dependence(loc, out, nullptr, true, true, d0->chunk_id);
|
||||
Dependence *dep = new Dependence(loc, out, nullptr, true,
|
||||
(out_ptr == nullptr), d0->chunk_id);
|
||||
return dep;
|
||||
} else {
|
||||
// Schedule the kernel on the GPU
|
||||
@@ -1544,12 +1635,14 @@ void *stream_emulator_make_memref_stream(const char *name, stream_type stype) {
|
||||
}
|
||||
void stream_emulator_put_memref(void *stream, uint64_t *allocated,
|
||||
uint64_t *aligned, uint64_t offset,
|
||||
uint64_t size, uint64_t stride) {
|
||||
uint64_t size, uint64_t stride,
|
||||
uint64_t data_ownership = 0) {
|
||||
assert(stride == 1 && "Strided memrefs not supported");
|
||||
Stream *s = (Stream *)stream;
|
||||
MemRef2 m = {allocated, aligned, offset, {1, size}, {size, stride}};
|
||||
Dependence *dep =
|
||||
new Dependence(host_location, memref_copy_alloc(m), nullptr, true, true);
|
||||
new Dependence(host_location, (data_ownership) ? m : memref_copy_alloc(m),
|
||||
nullptr, true, true);
|
||||
s->put(dep);
|
||||
s->generation++;
|
||||
}
|
||||
@@ -1573,12 +1666,14 @@ void *stream_emulator_make_memref_batch_stream(const char *name,
|
||||
void stream_emulator_put_memref_batch(void *stream, uint64_t *allocated,
|
||||
uint64_t *aligned, uint64_t offset,
|
||||
uint64_t size0, uint64_t size1,
|
||||
uint64_t stride0, uint64_t stride1) {
|
||||
uint64_t stride0, uint64_t stride1,
|
||||
uint64_t data_ownership = 0) {
|
||||
assert(stride1 == 1 && "Strided memrefs not supported");
|
||||
Stream *s = (Stream *)stream;
|
||||
MemRef2 m = {allocated, aligned, offset, {size0, size1}, {stride0, stride1}};
|
||||
Dependence *dep =
|
||||
new Dependence(host_location, memref_copy_alloc(m), nullptr, true, true);
|
||||
new Dependence(host_location, (data_ownership) ? m : memref_copy_alloc(m),
|
||||
nullptr, true, true);
|
||||
s->put(dep);
|
||||
s->generation++;
|
||||
}
|
||||
@@ -1587,6 +1682,10 @@ void stream_emulator_get_memref_batch(void *stream, uint64_t *out_allocated,
|
||||
uint64_t out_offset, uint64_t out_size0,
|
||||
uint64_t out_size1, uint64_t out_stride0,
|
||||
uint64_t out_stride1) {
|
||||
static size_t count = 0;
|
||||
END_TIME_C_ACC(&blocking_get_timer, "Non-GPU section execution", count,
|
||||
&acc1);
|
||||
BEGIN_TIME(&blocking_get_timer);
|
||||
assert(out_stride1 == 1 && "Strided memrefs not supported");
|
||||
MemRef2 mref = {out_allocated,
|
||||
out_aligned,
|
||||
@@ -1595,9 +1694,13 @@ void stream_emulator_get_memref_batch(void *stream, uint64_t *out_allocated,
|
||||
{out_stride0, out_stride1}};
|
||||
auto s = (Stream *)stream;
|
||||
s->get_on_host(mref);
|
||||
END_TIME_C_ACC(&blocking_get_timer, "GPU section execution", count++, &acc2);
|
||||
BEGIN_TIME(&blocking_get_timer);
|
||||
}
|
||||
|
||||
void *stream_emulator_init() {
|
||||
CONCRETELANG_ENABLE_TIMING();
|
||||
BEGIN_TIME(&init_timer);
|
||||
int num;
|
||||
assert(cudaGetDeviceCount(&num) == cudaSuccess);
|
||||
num_devices = num;
|
||||
@@ -1646,9 +1749,42 @@ void *stream_emulator_init() {
|
||||
if (num_cores < 1)
|
||||
num_cores = 1;
|
||||
|
||||
END_TIME(&init_timer, "Initialization of the SDFG runtime");
|
||||
BEGIN_TIME(&init_timer);
|
||||
|
||||
int device = next_device.fetch_add(1) % num_devices;
|
||||
return new GPU_DFG(device);
|
||||
}
|
||||
void stream_emulator_run(void *dfg) {}
|
||||
void stream_emulator_run(void *dfg) {
|
||||
END_TIME(&init_timer, "Building the SDFG graph");
|
||||
BEGIN_TIME(&blocking_get_timer);
|
||||
}
|
||||
void stream_emulator_delete(void *dfg) { delete (GPU_DFG *)dfg; }
|
||||
#endif
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
namespace gpu_dfg {
|
||||
|
||||
bool check_cuda_device_available() {
|
||||
#ifdef CONCRETELANG_CUDA_SUPPORT
|
||||
int num;
|
||||
if (cudaGetDeviceCount(&num) != cudaSuccess)
|
||||
return false;
|
||||
return num > 0;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool check_cuda_runtime_enabled() {
|
||||
#ifdef CONCRETELANG_CUDA_SUPPORT
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace gpu_dfg
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
|
||||
@@ -41,14 +41,17 @@ RuntimeContext::RuntimeContext(ServerKeyset serverKeyset)
|
||||
}
|
||||
|
||||
#ifdef CONCRETELANG_CUDA_SUPPORT
|
||||
assert(cudaGetDeviceCount(&num_devices) == cudaSuccess);
|
||||
bsk_gpu.resize(num_devices);
|
||||
ksk_gpu.resize(num_devices);
|
||||
for (int i = 0; i < num_devices; ++i) {
|
||||
bsk_gpu[i].resize(serverKeyset.lweBootstrapKeys.size(), nullptr);
|
||||
ksk_gpu[i].resize(serverKeyset.lweKeyswitchKeys.size(), nullptr);
|
||||
bsk_gpu_mutex.push_back(std::make_unique<std::mutex>());
|
||||
ksk_gpu_mutex.push_back(std::make_unique<std::mutex>());
|
||||
if (cudaGetDeviceCount(&num_devices) == cudaSuccess) {
|
||||
bsk_gpu.resize(num_devices);
|
||||
ksk_gpu.resize(num_devices);
|
||||
for (int i = 0; i < num_devices; ++i) {
|
||||
bsk_gpu[i].resize(serverKeyset.lweBootstrapKeys.size(), nullptr);
|
||||
ksk_gpu[i].resize(serverKeyset.lweKeyswitchKeys.size(), nullptr);
|
||||
bsk_gpu_mutex.push_back(std::make_unique<std::mutex>());
|
||||
ksk_gpu_mutex.push_back(std::make_unique<std::mutex>());
|
||||
}
|
||||
} else {
|
||||
num_devices = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#include "concretelang/Runtime/time_util.h"
|
||||
|
||||
#if CONCRETELANG_TIMING_ENABLED
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
namespace time_util {
|
||||
|
||||
bool timing_enabled = false;
|
||||
struct timespec timestamp;
|
||||
|
||||
} // namespace time_util
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
|
||||
#endif
|
||||
15
compilers/concrete-compiler/compiler/lib/Runtime/utils.cpp
Normal file
15
compilers/concrete-compiler/compiler/lib/Runtime/utils.cpp
Normal file
@@ -0,0 +1,15 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#include "concretelang/Runtime/utils.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
void LLVMInitializeNativeTarget() {
|
||||
llvm::InitializeNativeTarget();
|
||||
llvm::InitializeNativeTargetAsmPrinter();
|
||||
}
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
@@ -112,7 +112,6 @@ void memref_batched_keyswitch_lwe_cuda_u64(
|
||||
uint64_t ct0_stride0, uint64_t ct0_stride1, uint32_t level,
|
||||
uint32_t base_log, uint32_t input_lwe_dim, uint32_t output_lwe_dim,
|
||||
uint32_t ksk_index, mlir::concretelang::RuntimeContext *context) {
|
||||
assert(ksk_index == 0 && "multiple ksk is not yet implemented on GPU");
|
||||
assert(out_size0 == ct0_size0);
|
||||
assert(out_size1 == output_lwe_dim + 1);
|
||||
assert(ct0_size1 == input_lwe_dim + 1);
|
||||
@@ -159,7 +158,6 @@ void memref_batched_bootstrap_lwe_cuda_u64(
|
||||
uint64_t tlu_stride, uint32_t input_lwe_dim, uint32_t poly_size,
|
||||
uint32_t level, uint32_t base_log, uint32_t glwe_dim, uint32_t bsk_index,
|
||||
mlir::concretelang::RuntimeContext *context) {
|
||||
assert(bsk_index == 0 && "multiple bsk is not yet implemented on GPU");
|
||||
assert(out_size0 == ct0_size0);
|
||||
assert(out_size1 == glwe_dim * poly_size + 1);
|
||||
// TODO: Multi GPU
|
||||
@@ -247,7 +245,6 @@ void memref_batched_mapped_bootstrap_lwe_cuda_u64(
|
||||
uint32_t input_lwe_dim, uint32_t poly_size, uint32_t level,
|
||||
uint32_t base_log, uint32_t glwe_dim, uint32_t bsk_index,
|
||||
mlir::concretelang::RuntimeContext *context) {
|
||||
assert(bsk_index == 0 && "multiple bsk is not yet implemented on GPU");
|
||||
assert(out_size0 == ct0_size0);
|
||||
assert(out_size1 == glwe_dim * poly_size + 1);
|
||||
assert((out_size0 == tlu_size0 || tlu_size0 == 1) &&
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "concretelang/Common/Protocol.h"
|
||||
#include "concretelang/Common/Transformers.h"
|
||||
#include "concretelang/Common/Values.h"
|
||||
#include "concretelang/Runtime/DFRuntime.hpp"
|
||||
#include "concretelang/Runtime/context.h"
|
||||
#include "concretelang/ServerLib/ServerLib.h"
|
||||
#include "concretelang/Support/CompilerEngine.h"
|
||||
@@ -355,6 +356,7 @@ DynamicModule::open(const std::string &sharedLibPath) {
|
||||
if (!module->libraryHandle) {
|
||||
return StringError("Cannot open shared library ") << dlerror();
|
||||
}
|
||||
mlir::concretelang::dfr::_dfr_register_lib(module->libraryHandle);
|
||||
return module;
|
||||
}
|
||||
|
||||
@@ -430,6 +432,13 @@ bool getGateIsSigned(const Message<concreteprotocol::GateInfo> &gateInfo) {
|
||||
Result<std::vector<TransportValue>>
|
||||
ServerCircuit::call(const ServerKeyset &serverKeyset,
|
||||
std::vector<TransportValue> &args) {
|
||||
std::vector<TransportValue> returns(returnsBuffer.size());
|
||||
mlir::concretelang::dfr::_dfr_register_lib(dynamicModule->libraryHandle);
|
||||
if (!mlir::concretelang::dfr::_dfr_is_root_node()) {
|
||||
mlir::concretelang::dfr::_dfr_run_remote_scheduler();
|
||||
return returns;
|
||||
}
|
||||
|
||||
if (args.size() != argsBuffer.size()) {
|
||||
return StringError("Called circuit with wrong number of arguments");
|
||||
}
|
||||
@@ -444,7 +453,6 @@ ServerCircuit::call(const ServerKeyset &serverKeyset,
|
||||
invoke(serverKeyset);
|
||||
|
||||
// We process the return values to turn them into transport values.
|
||||
std::vector<TransportValue> returns(returnsBuffer.size());
|
||||
for (size_t i = 0; i < returnsBuffer.size(); i++) {
|
||||
OUTCOME_TRY(returns[i], returnTransformers[i](returnsBuffer[i]));
|
||||
}
|
||||
|
||||
@@ -63,14 +63,10 @@
|
||||
#include "concretelang/Support/LLVMEmitFile.h"
|
||||
#include "concretelang/Support/Pipeline.h"
|
||||
#include "concretelang/Support/Utils.h"
|
||||
#include <concretelang/Runtime/GPUDFG.hpp>
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
// TODO: should be removed when bufferization is not related to CAPI lowering
|
||||
// Control whether we should call a cpu of gpu function when lowering
|
||||
// to CAPI
|
||||
static bool EMIT_GPU_OPS;
|
||||
bool getEmitGPUOption() { return EMIT_GPU_OPS; }
|
||||
|
||||
/// Creates a new compilation context that can be shared across
|
||||
/// compilation engines and results
|
||||
@@ -297,9 +293,6 @@ CompilerEngine::compile(mlir::ModuleOp moduleOp, Target target,
|
||||
|
||||
mlir::MLIRContext &mlirContext = *this->compilationContext->getMLIRContext();
|
||||
|
||||
// enable/disable usage of gpu functions during bufferization
|
||||
EMIT_GPU_OPS = options.emitGPUOps;
|
||||
|
||||
auto dataflowParallelize =
|
||||
options.autoParallelize || options.dataflowParallelize;
|
||||
auto loopParallelize = options.autoParallelize || options.loopParallelize;
|
||||
@@ -307,6 +300,56 @@ CompilerEngine::compile(mlir::ModuleOp moduleOp, Target target,
|
||||
if (loopParallelize)
|
||||
mlir::concretelang::dfr::_dfr_set_use_omp(true);
|
||||
|
||||
// Sanity checks for enabling GPU usage: the compiler must have been
|
||||
// compiled with Cuda support (especially important when building
|
||||
// python wheels), and at least one device must be available to
|
||||
// execute on.
|
||||
if (options.emitGPUOps) {
|
||||
// If this compiler is not compiled using Cuda support, then
|
||||
// requesting GPU is forbidden - instead of a hard error, issue a
|
||||
// warning and disable the GPU option.
|
||||
if (!mlir::concretelang::gpu_dfg::check_cuda_runtime_enabled()) {
|
||||
// Allow compilation to complete if only code generation is expected.
|
||||
if (target != Target::LIBRARY) {
|
||||
warnx("This instance of the Concrete compiler does not support GPU "
|
||||
"acceleration."
|
||||
" Allowing code generation to proceed, but execution will not be "
|
||||
"possible.");
|
||||
} else {
|
||||
warnx("This instance of the Concrete compiler does not support GPU "
|
||||
"acceleration."
|
||||
" If you are using Concrete-Python, it means that the module "
|
||||
"installed is not GPU enabled.\n"
|
||||
"Continuing without GPU acceleration.");
|
||||
options.emitGPUOps = false;
|
||||
options.emitSDFGOps = false;
|
||||
options.batchTFHEOps = false;
|
||||
}
|
||||
} else {
|
||||
// Ensure that at least one Cuda device is available if GPU option
|
||||
// is used
|
||||
if (!mlir::concretelang::gpu_dfg::check_cuda_device_available()) {
|
||||
warnx("No Cuda device available on this system (either not present or "
|
||||
"the driver is not online).\n"
|
||||
"Continuing without GPU acceleration.");
|
||||
options.emitGPUOps = false;
|
||||
options.emitSDFGOps = false;
|
||||
options.batchTFHEOps = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Finally for now we cannot allow dataflow parallelization at the
|
||||
// same time as GPU usage. This restriction will be relaxed later.
|
||||
if (dataflowParallelize) {
|
||||
warnx("Dataflow parallelization and GPU offloading have both been "
|
||||
"requested. This is not currently supported. Continuing without "
|
||||
"dataflow parallelization.");
|
||||
dataflowParallelize = false;
|
||||
}
|
||||
}
|
||||
|
||||
// If dataflow parallelization will proceed, mark it for
|
||||
// initialising the runtime
|
||||
if (dataflowParallelize)
|
||||
mlir::concretelang::dfr::_dfr_set_required(true);
|
||||
|
||||
@@ -594,8 +637,11 @@ CompilerEngine::compile(mlir::ModuleOp moduleOp, Target target,
|
||||
}
|
||||
}
|
||||
|
||||
// Restrict direct lowering when already generating GPU code through
|
||||
// the SDFG dialect.
|
||||
bool lowerDirectlyToGPUOps = (options.emitGPUOps && !options.emitSDFGOps);
|
||||
if (mlir::concretelang::pipeline::lowerToCAPI(mlirContext, module, enablePass,
|
||||
options.emitGPUOps)
|
||||
lowerDirectlyToGPUOps)
|
||||
.failed()) {
|
||||
return StreamStringError("Failed to lower to CAPI");
|
||||
}
|
||||
|
||||
@@ -47,8 +47,10 @@
|
||||
#include "concretelang/Dialect/FHELinalg/Transforms/Tiling.h"
|
||||
#include "concretelang/Dialect/RT/Analysis/Autopar.h"
|
||||
#include "concretelang/Dialect/RT/Transforms/Passes.h"
|
||||
#include "concretelang/Dialect/SDFG/Transforms/Passes.h"
|
||||
#include "concretelang/Dialect/TFHE/Analysis/ExtractStatistics.h"
|
||||
#include "concretelang/Dialect/TFHE/Transforms/Transforms.h"
|
||||
#include "concretelang/Runtime/utils.h"
|
||||
#include "concretelang/Support/CompilerEngine.h"
|
||||
#include "concretelang/Support/Error.h"
|
||||
#include "concretelang/Support/Pipeline.h"
|
||||
@@ -361,6 +363,7 @@ mlir::LogicalResult batchTFHE(mlir::MLIRContext &context,
|
||||
pm, mlir::concretelang::createCollapseParallelLoops(), enablePass);
|
||||
addPotentiallyNestedPass(
|
||||
pm, mlir::concretelang::createBatchingPass(maxBatchSize), enablePass);
|
||||
addPotentiallyNestedPass(pm, mlir::createCanonicalizerPass(), enablePass);
|
||||
|
||||
return pm.run(module.getOperation());
|
||||
}
|
||||
@@ -569,6 +572,8 @@ mlir::LogicalResult lowerToStd(mlir::MLIRContext &context,
|
||||
enablePass);
|
||||
addPotentiallyNestedPass(
|
||||
pm, mlir::concretelang::createFixupBufferDeallocationPass(), enablePass);
|
||||
addPotentiallyNestedPass(
|
||||
pm, mlir::concretelang::createSDFGBufferOwnershipPass(), enablePass);
|
||||
|
||||
return pm.run(module);
|
||||
}
|
||||
@@ -610,8 +615,7 @@ std::unique_ptr<llvm::Module>
|
||||
lowerLLVMDialectToLLVMIR(mlir::MLIRContext &context,
|
||||
llvm::LLVMContext &llvmContext,
|
||||
mlir::ModuleOp &module) {
|
||||
llvm::InitializeNativeTarget();
|
||||
llvm::InitializeNativeTargetAsmPrinter();
|
||||
mlir::concretelang::LLVMInitializeNativeTarget();
|
||||
mlir::registerLLVMDialectTranslation(*module->getContext());
|
||||
mlir::registerOpenMPDialectTranslation(*module->getContext());
|
||||
|
||||
|
||||
@@ -23,39 +23,26 @@ public:
|
||||
matchAndRewrite(mlir::scf::ForOp forOp,
|
||||
mlir::PatternRewriter &rewriter) const override {
|
||||
auto attr = forOp->getAttrOfType<mlir::BoolAttr>("parallel");
|
||||
if (attr == nullptr) {
|
||||
|
||||
if (!attr || !attr.getValue()) {
|
||||
return mlir::failure();
|
||||
}
|
||||
|
||||
assert(forOp.getRegionIterArgs().size() == 0 &&
|
||||
"unexpecting iter args when loops are bufferized");
|
||||
if (attr.getValue()) {
|
||||
rewriter.replaceOpWithNewOp<mlir::scf::ParallelOp>(
|
||||
forOp, mlir::ValueRange{forOp.getLowerBound()},
|
||||
mlir::ValueRange{forOp.getUpperBound()}, forOp.getStep(),
|
||||
std::nullopt,
|
||||
[&](mlir::OpBuilder &builder, mlir::Location location,
|
||||
mlir::ValueRange indVar, mlir::ValueRange iterArgs) {
|
||||
mlir::IRMapping map;
|
||||
map.map(forOp.getInductionVar(), indVar.front());
|
||||
for (auto &op : forOp.getRegion().front()) {
|
||||
auto newOp = builder.clone(op, map);
|
||||
map.map(op.getResults(), newOp->getResults());
|
||||
}
|
||||
});
|
||||
} else {
|
||||
rewriter.replaceOpWithNewOp<mlir::scf::ForOp>(
|
||||
forOp, forOp.getLowerBound(), forOp.getUpperBound(), forOp.getStep(),
|
||||
std::nullopt,
|
||||
[&](mlir::OpBuilder &builder, mlir::Location location,
|
||||
mlir::Value indVar, mlir::ValueRange iterArgs) {
|
||||
mlir::IRMapping map;
|
||||
map.map(forOp.getInductionVar(), indVar);
|
||||
for (auto &op : forOp.getRegion().front()) {
|
||||
auto newOp = builder.clone(op, map);
|
||||
map.map(op.getResults(), newOp->getResults());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
rewriter.replaceOpWithNewOp<mlir::scf::ParallelOp>(
|
||||
forOp, mlir::ValueRange{forOp.getLowerBound()},
|
||||
mlir::ValueRange{forOp.getUpperBound()}, forOp.getStep(), std::nullopt,
|
||||
[&](mlir::OpBuilder &builder, mlir::Location location,
|
||||
mlir::ValueRange indVar, mlir::ValueRange iterArgs) {
|
||||
mlir::IRMapping map;
|
||||
map.map(forOp.getInductionVar(), indVar.front());
|
||||
for (auto &op : forOp.getRegion().front()) {
|
||||
auto newOp = builder.clone(op, map);
|
||||
map.map(op.getResults(), newOp->getResults());
|
||||
}
|
||||
});
|
||||
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
@@ -122,8 +122,7 @@ func.func @batch_offset_extract_keyswitch(%arg0: tensor<99x2x3x4x99x99x!TFHE.glw
|
||||
%c97 = arith.constant 97 : index
|
||||
|
||||
%0 = bufferization.alloc_tensor() : tensor<2x3x4x!TFHE.glwe<sk<1,1,750>>>
|
||||
// CHECK: %[[VDROP1DIMS:.*]] = tensor.collapse_shape [[ARG:.*]] {{\[\[0, 1\], \[2\], \[3, 4, 5\]\]}} : tensor<1x2x3x4x1x1x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<2x3x4x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
|
||||
// CHECK: %[[V0:.*]] = tensor.collapse_shape %[[VDROP1DIMS]] {{\[\[0, 1, 2\]\]}} : tensor<2x3x4x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<24x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
|
||||
// CHECK: %[[V0:.*]] = tensor.collapse_shape %[[SLICE:.*]] {{\[\[0, 1, 2, 3, 4, 5\]\]}} : tensor<1x2x3x4x1x1x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<24x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
|
||||
// CHECK: %[[V1:.*]] = "TFHE.batched_keyswitch_glwe"(%[[V0]]) {key = #TFHE<ksk{{\[}}[[KSK:.*]]{{\]}}<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>, sk{{\[}}[[SK_OUT]]{{\]}}<1,750>, 3, 4>>} : (tensor<24x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>) -> tensor<24x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>>
|
||||
// CHECK: %[[V2:.*]] = tensor.expand_shape %[[V1]] {{\[\[0, 1, 2\]\]}} : tensor<24x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>> into tensor<2x3x4x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>>
|
||||
// CHECK: return %[[V2]]
|
||||
@@ -161,8 +160,7 @@ func.func @batch_offset_shifted_bounds_nonunitstep_extract_keyswitch(%arg0: tens
|
||||
%0 = bufferization.alloc_tensor() : tensor<2x2x2x!TFHE.glwe<sk<1,1,750>>>
|
||||
|
||||
// CHECK: %[[V1:.*]] = tensor.extract_slice %arg0{{\[0, 3, 7, 9, 97, 1\] \[1, 2, 2, 2, 1, 1\] \[1, 2, 1, 7, 1, 1\]}} : tensor<99x20x30x40x99x99x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> to tensor<1x2x2x2x1x1x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
|
||||
// CHECK-NEXT: %[[V2:.*]] = tensor.collapse_shape %[[V1]] {{\[\[0, 1\], \[2\], \[3, 4, 5\]\]}} : tensor<1x2x2x2x1x1x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<2x2x2x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
|
||||
// CHECK-NEXT: %[[V3:.*]] = tensor.collapse_shape %[[V2]] {{\[\[0, 1, 2\]\]}} : tensor<2x2x2x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<8x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
|
||||
// CHECK-NEXT: %[[V3:.*]] = tensor.collapse_shape %[[V1]] {{\[\[0, 1, 2, 3, 4, 5\]\]}} : tensor<1x2x2x2x1x1x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>> into tensor<8x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>
|
||||
// CHECK-NEXT: %[[V4:.*]] = "TFHE.batched_keyswitch_glwe"(%[[V3]]) {key = #TFHE<ksk{{\[}}[[KSK:.*]]{{\]}}<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>, sk{{\[}}[[SK_OUT]]{{\]}}<1,750>, 3, 4>>} : (tensor<8x!TFHE.glwe<sk{{\[}}[[SK_IN]]{{\]}}<1,2048>>>) -> tensor<8x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>>
|
||||
// CHECK-NEXT: %[[V5:.*]] = tensor.expand_shape %[[V4]] {{\[\[0, 1, 2\]\]}} : tensor<8x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>> into tensor<2x2x2x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>>
|
||||
// CHECK-NEXT: return %[[V5]] : tensor<2x2x2x!TFHE.glwe<sk{{\[}}[[SK_OUT]]{{\]}}<1,750>>>
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
// RUN: concretecompiler --split-input-file --action=dump-std --parallelize --parallelize-loops --skip-program-info --passes=for-loop-to-parallel --skip-program-info %s 2>&1| FileCheck %s
|
||||
|
||||
func.func @bar() -> () {
|
||||
%c0 = arith.constant 0 : index
|
||||
%c1 = arith.constant 1 : index
|
||||
%c4 = arith.constant 4 : index
|
||||
%i0 = arith.constant 0 : i32
|
||||
%i1 = arith.constant 1 : i32
|
||||
|
||||
// CHECK-NOT: scf.parallel
|
||||
%0 = scf.for %iv = %c0 to %c4 step %c1 iter_args(%ia = %i0) -> i32 {
|
||||
"Tracing.trace_plaintext"(%i0) : (i32) -> ()
|
||||
%yld = arith.addi %ia, %i1 : i32
|
||||
scf.yield %yld : i32
|
||||
} {"parallel" = false }
|
||||
|
||||
return
|
||||
}
|
||||
@@ -54,15 +54,13 @@ static void BM_ExportArguments(benchmark::State &state,
|
||||
inputArguments.reserve(test.inputs.size());
|
||||
|
||||
auto client = tc.getClientCircuit().value();
|
||||
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
|
||||
for (auto _ : state) {
|
||||
for (size_t i = 0; i < test.inputs.size(); i++) {
|
||||
auto input = client.prepareInput(test.inputs[i].getValue(), i).value();
|
||||
inputArguments.push_back(input);
|
||||
}
|
||||
for (auto _ : state) {
|
||||
for (size_t i = 0; i < test.inputs.size(); i++) {
|
||||
auto input = client.prepareInput(test.inputs[i].getValue(), i).value();
|
||||
inputArguments.push_back(input);
|
||||
}
|
||||
inputArguments.resize(0);
|
||||
}
|
||||
inputArguments.resize(0);
|
||||
}
|
||||
|
||||
/// Benchmark time of the program evaluation
|
||||
@@ -78,12 +76,10 @@ static void BM_Evaluate(benchmark::State &state, EndToEndDesc description,
|
||||
auto inputArguments = std::vector<TransportValue>();
|
||||
inputArguments.reserve(test.inputs.size());
|
||||
|
||||
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
|
||||
for (size_t i = 0; i < test.inputs.size(); i++) {
|
||||
auto input =
|
||||
clientCircuit.prepareInput(test.inputs[i].getValue(), i).value();
|
||||
inputArguments.push_back(input);
|
||||
}
|
||||
for (size_t i = 0; i < test.inputs.size(); i++) {
|
||||
auto input =
|
||||
clientCircuit.prepareInput(test.inputs[i].getValue(), i).value();
|
||||
inputArguments.push_back(input);
|
||||
}
|
||||
|
||||
auto serverCircuit = tc.getServerCircuit().value();
|
||||
|
||||
@@ -26,14 +26,14 @@ program: |
|
||||
p-error: 1e-06
|
||||
tests:
|
||||
- inputs:
|
||||
- tensor: [-2, -1, -1, -2, -2, -1, -1, -1, -1, -1, -2, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -2, -2, -2]
|
||||
- tensor: [-1, -1, -2, -2, -1, -2, -2, -1, -2, -1, -2, -1, -1, -2, -2, -2, -2, -1, -2, -1, -1, -2, -1, -1]
|
||||
shape: [2,3,4]
|
||||
signed: True
|
||||
- tensor: [-2, -1, -2, -1, -1, -1, -2, -2, -1, -2, -2, -1, -1, -1, -2, -1]
|
||||
- tensor: [-1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -2, -1, -2, -1, -2]
|
||||
shape: [2,4,2]
|
||||
signed: True
|
||||
outputs:
|
||||
- tensor: [11, 8, 9, 6, 8, 6, 9, 7, 6, 5, 12, 10]
|
||||
- tensor: [8, 8, 8, 8, 8, 8, 9, 13, 7, 10, 7, 9]
|
||||
shape: [2,3,2]
|
||||
signed: True
|
||||
---
|
||||
@@ -63,14 +63,14 @@ program: |
|
||||
p-error: 1e-06
|
||||
tests:
|
||||
- inputs:
|
||||
- tensor: [-1, -2, -2, -1, -2, -1, -2, -2, -1, -1, -1, -1]
|
||||
- tensor: [-2, -1, -1, -1, -1, -2, -1, -1, -1, -1, -2, -2]
|
||||
shape: [3,4]
|
||||
signed: True
|
||||
- tensor: [-2, -2, -2, -1, -1, -2, -1, -1]
|
||||
- tensor: [-2, -2, -1, -1, -1, -2, -2, -2]
|
||||
shape: [4,2]
|
||||
signed: True
|
||||
outputs:
|
||||
- tensor: [9, 9, 10, 11, 6, 6]
|
||||
- tensor: [8, 9, 7, 8, 9, 11]
|
||||
shape: [3,2]
|
||||
signed: True
|
||||
---
|
||||
@@ -100,14 +100,14 @@ program: |
|
||||
p-error: 1e-06
|
||||
tests:
|
||||
- inputs:
|
||||
- tensor: [-1, -2, -1]
|
||||
- tensor: [-1, -2, -2]
|
||||
shape: [3]
|
||||
signed: True
|
||||
- tensor: [-2, -1, -2, -2, -2, -2, -2, -1, -1, -1, -2, -2, -2, -2, -2, -1, -2, -2, -1, -1, -1, -2, -2, -1]
|
||||
- tensor: [-2, -2, -1, -2, -1, -1, -2, -1, -2, -2, -1, -2, -2, -2, -2, -1, -1, -2, -1, -1, -1, -1, -2, -1]
|
||||
shape: [4,3,2]
|
||||
signed: True
|
||||
outputs:
|
||||
- tensor: [8, 7, 6, 5, 8, 6, 5, 6]
|
||||
- tensor: [6, 8, 8, 9, 8, 8, 7, 5]
|
||||
shape: [4,2]
|
||||
signed: True
|
||||
---
|
||||
@@ -137,14 +137,14 @@ program: |
|
||||
p-error: 1e-06
|
||||
tests:
|
||||
- inputs:
|
||||
- tensor: [-1, -2, -1, -2, -2, -2, -2, -1, -2, -2, -2, -2, -1, -2, -1, -1, -2, -2, -1, -1, -2, -2, -2, -2]
|
||||
- tensor: [-2, -1, -1, -2, -2, -1, -1, -2, -1, -2, -2, -2, -1, -2, -2, -2, -2, -1, -1, -2, -2, -2, -1, -1]
|
||||
shape: [2,3,4]
|
||||
signed: True
|
||||
- tensor: [-1, -2, -1, -1]
|
||||
- tensor: [-2, -2, -2, -2]
|
||||
shape: [4]
|
||||
signed: True
|
||||
outputs:
|
||||
- tensor: [8, 9, 10, 7, 8, 10]
|
||||
- tensor: [12, 12, 14, 14, 12, 12]
|
||||
shape: [2,3]
|
||||
signed: True
|
||||
---
|
||||
@@ -174,14 +174,14 @@ program: |
|
||||
p-error: 1e-06
|
||||
tests:
|
||||
- inputs:
|
||||
- tensor: [-2, -1, -2, -1, -1, -1, -2, -2, -2, -2, -2, -1, -2, -1, -2, -1, -1, -1, -1, -2, -2, -1, -2, -2]
|
||||
- tensor: [-2, -2, -2, -1, -1, -1, -2, -1, -1, -2, -2, -2, -1, -1, -2, -2, -2, -1, -1, -1, -1, -2, -1, -2]
|
||||
shape: [2,1,3,4]
|
||||
signed: True
|
||||
- tensor: [-2, -1, -2, -2, -2, -1, -1, -1, -2, -2, -1, -1, -2, -2, -2, -1, -2, -1, -2, -2, -2, -1, -2, -2, -1, -2, -2, -2, -2, -2, -1, -1, -1, -2, -2, -1, -2, -1, -1, -1]
|
||||
- tensor: [-2, -1, -2, -1, -2, -2, -1, -2, -1, -2, -2, -2, -2, -2, -1, -2, -1, -1, -1, -2, -2, -1, -2, -2, -2, -2, -2, -1, -2, -1, -1, -2, -1, -2, -1, -1, -1, -1, -2, -1]
|
||||
shape: [5,4,2]
|
||||
signed: True
|
||||
outputs:
|
||||
- tensor: [11, 7, 10, 7, 13, 9, 11, 10, 11, 9, 12, 11, 12, 8, 12, 9, 14, 10, 9, 11, 9, 10, 11, 13, 9, 8, 9, 7, 11, 9, 11, 7, 8, 6, 12, 8, 11, 10, 9, 7, 13, 11, 12, 8, 10, 8, 14, 10, 9, 11, 7, 8, 10, 12, 9, 8, 7, 6, 10, 9]
|
||||
- tensor: [13, 10, 9, 8, 12, 11, 11, 14, 8, 10, 11, 14, 10, 10, 8, 7, 11, 11, 13, 10, 9, 7, 12, 10, 8, 9, 6, 6, 9, 8, 10, 10, 9, 7, 10, 9, 9, 12, 7, 10, 9, 12, 10, 9, 7, 7, 9, 10, 10, 9, 9, 8, 10, 9, 8, 7, 6, 7, 8, 7]
|
||||
shape: [2,5,3,2]
|
||||
signed: True
|
||||
---
|
||||
@@ -212,10 +212,10 @@ program: |
|
||||
p-error: 1e-06
|
||||
tests:
|
||||
- inputs:
|
||||
- tensor: [-1, -2, -2]
|
||||
- tensor: [-2, -2, -1]
|
||||
shape: [3]
|
||||
signed: True
|
||||
- tensor: [-1, -1, -2]
|
||||
- tensor: [-2, -1, -1]
|
||||
shape: [3]
|
||||
signed: True
|
||||
outputs:
|
||||
|
||||
@@ -28,9 +28,3 @@ add_concretecompiler_unittest(end_to_end_jit_test end_to_end_jit_test.cc globals
|
||||
add_concretecompiler_unittest(end_to_end_test end_to_end_test.cc globals.cc)
|
||||
|
||||
add_concretecompiler_unittest(end_to_end_jit_lambda end_to_end_jit_lambda.cc globals.cc)
|
||||
|
||||
if(CONCRETELANG_DATAFLOW_EXECUTION_ENABLED)
|
||||
add_concretecompiler_unittest(end_to_end_jit_auto_parallelization end_to_end_jit_auto_parallelization.cc globals.cc)
|
||||
add_concretecompiler_unittest(end_to_end_jit_distributed end_to_end_jit_distributed.cc globals.cc)
|
||||
add_concretecompiler_unittest(end_to_end_jit_aes_short end_to_end_jit_aes_short.cc globals.cc)
|
||||
endif()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,167 +0,0 @@
|
||||
|
||||
#include <concretelang/Runtime/DFRuntime.hpp>
|
||||
#include <cstdint>
|
||||
#include <gtest/gtest.h>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
#include "concretelang/TestLib/TestProgram.h"
|
||||
#include "end_to_end_jit_test.h"
|
||||
#include "tests_tools/GtestEnvironment.h"
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Auto-parallelize independent FHE ops /////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(ParallelizeAndRunFHE, add_eint_tree) {
|
||||
checkedJit(testCircuit, R"XXX(
|
||||
func.func @main(%arg0: !FHE.eint<7>, %arg1: !FHE.eint<7>, %arg2: !FHE.eint<7>, %arg3: !FHE.eint<7>) -> !FHE.eint<7> {
|
||||
%1 = "FHE.add_eint"(%arg0, %arg1): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%2 = "FHE.add_eint"(%arg0, %arg2): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%3 = "FHE.add_eint"(%arg0, %arg3): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%4 = "FHE.add_eint"(%arg1, %arg2): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%5 = "FHE.add_eint"(%arg1, %arg3): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%6 = "FHE.add_eint"(%arg2, %arg3): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
|
||||
%7 = "FHE.add_eint"(%1, %2): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%8 = "FHE.add_eint"(%1, %3): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%9 = "FHE.add_eint"(%1, %4): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%10 = "FHE.add_eint"(%1, %5): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%11 = "FHE.add_eint"(%1, %6): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%12 = "FHE.add_eint"(%2, %3): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%13 = "FHE.add_eint"(%2, %4): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%14 = "FHE.add_eint"(%2, %5): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%15 = "FHE.add_eint"(%2, %6): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%16 = "FHE.add_eint"(%3, %4): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%17 = "FHE.add_eint"(%3, %5): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%18 = "FHE.add_eint"(%3, %6): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%19 = "FHE.add_eint"(%4, %5): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%20 = "FHE.add_eint"(%4, %6): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%21 = "FHE.add_eint"(%5, %6): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
|
||||
%22 = "FHE.add_eint"(%7, %8): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%23 = "FHE.add_eint"(%9, %10): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%24 = "FHE.add_eint"(%11, %12): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%25 = "FHE.add_eint"(%13, %14): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%26 = "FHE.add_eint"(%15, %16): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%27 = "FHE.add_eint"(%17, %18): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%28 = "FHE.add_eint"(%19, %20): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
|
||||
%29 = "FHE.add_eint"(%22, %23): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%30 = "FHE.add_eint"(%24, %25): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%31 = "FHE.add_eint"(%26, %27): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%32 = "FHE.add_eint"(%21, %28): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
|
||||
%33 = "FHE.add_eint"(%29, %30): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
%34 = "FHE.add_eint"(%31, %32): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
|
||||
%35 = "FHE.add_eint"(%33, %34): (!FHE.eint<7>, !FHE.eint<7>) -> (!FHE.eint<7>)
|
||||
return %35: !FHE.eint<7>
|
||||
}
|
||||
)XXX",
|
||||
"main", false, true, false, false, 1e-40);
|
||||
|
||||
auto lambda = [&](std::vector<concretelang::values::Value> args) {
|
||||
return testCircuit.call(args)
|
||||
.value()[0]
|
||||
.template getTensor<uint64_t>()
|
||||
.value()[0];
|
||||
};
|
||||
|
||||
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
|
||||
ASSERT_EQ(lambda({Tensor<uint64_t>(1), Tensor<uint64_t>(2),
|
||||
Tensor<uint64_t>(3), Tensor<uint64_t>(4)}),
|
||||
(uint64_t)150);
|
||||
ASSERT_EQ(lambda({Tensor<uint64_t>(4), Tensor<uint64_t>(5),
|
||||
Tensor<uint64_t>(6), Tensor<uint64_t>(7)}),
|
||||
(uint64_t)74);
|
||||
ASSERT_EQ(lambda({Tensor<uint64_t>(1), Tensor<uint64_t>(1),
|
||||
Tensor<uint64_t>(1), Tensor<uint64_t>(1)}),
|
||||
(uint64_t)60);
|
||||
ASSERT_EQ(lambda({Tensor<uint64_t>(5), Tensor<uint64_t>(7),
|
||||
Tensor<uint64_t>(11), Tensor<uint64_t>(13)}),
|
||||
(uint64_t)28);
|
||||
} else {
|
||||
ASSERT_OUTCOME_HAS_FAILURE(testCircuit.call({}));
|
||||
ASSERT_OUTCOME_HAS_FAILURE(testCircuit.call({}));
|
||||
ASSERT_OUTCOME_HAS_FAILURE(testCircuit.call({}));
|
||||
ASSERT_OUTCOME_HAS_FAILURE(testCircuit.call({}));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint64_t> parallel_results;
|
||||
|
||||
TEST(ParallelizeAndRunFHE, nn_small_parallel) {
|
||||
checkedJit(lambda, R"XXX(
|
||||
func.func @main(%arg0: tensor<4x5x!FHE.eint<5>>) -> tensor<4x7x!FHE.eint<5>> {
|
||||
%cst = arith.constant dense<[[0, 0, 1, 0, 1, 1, 0], [1, 1, 1, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1]]> : tensor<4x7xi6>
|
||||
%cst_0 = arith.constant dense<[[1, 0, 1, 1, 0, 1, 1], [0, 1, 0, 0, 0, 0, 1], [0, 1, 1, 1, 1, 0, 0], [0, 1, 1, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0, 1]]> : tensor<5x7xi6>
|
||||
%0 = "FHELinalg.matmul_eint_int"(%arg0, %cst_0) : (tensor<4x5x!FHE.eint<5>>, tensor<5x7xi6>) -> tensor<4x7x!FHE.eint<5>>
|
||||
%1 = "FHELinalg.add_eint_int"(%0, %cst) : (tensor<4x7x!FHE.eint<5>>, tensor<4x7xi6>) -> tensor<4x7x!FHE.eint<5>>
|
||||
%cst_1 = arith.constant dense<[0, 3, 7, 10, 14, 17, 21, 24, 28, 31, 35, 38, 42, 45, 49, 52, 56, 59, 63, 66, 70, 73, 77, 80, 84, 87, 91, 94, 98, 101, 105, 108]> : tensor<32xi64>
|
||||
%2 = "FHELinalg.apply_lookup_table"(%1, %cst_1) : (tensor<4x7x!FHE.eint<5>>, tensor<32xi64>) -> tensor<4x7x!FHE.eint<5>>
|
||||
return %2 : tensor<4x7x!FHE.eint<5>>
|
||||
}
|
||||
)XXX",
|
||||
"main", false, true, true);
|
||||
|
||||
const size_t dim0 = 4;
|
||||
const size_t dim1 = 5;
|
||||
const size_t dim2 = 7;
|
||||
const std::vector<size_t> inputShape({dim0, dim1});
|
||||
const std::vector<size_t> outputShape({dim0, dim2});
|
||||
std::vector<uint64_t> values;
|
||||
values.reserve(dim0 * dim1);
|
||||
for (size_t i = 0; i < dim0 * dim1; ++i) {
|
||||
values.push_back(i % 17 % 4);
|
||||
}
|
||||
auto input = Tensor<uint64_t>(values, inputShape);
|
||||
|
||||
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
|
||||
auto maybeResult = lambda.call({input});
|
||||
ASSERT_OUTCOME_HAS_VALUE(maybeResult);
|
||||
auto result = maybeResult.value()[0].template getTensor<uint64_t>().value();
|
||||
ASSERT_EQ(result.dimensions, outputShape);
|
||||
parallel_results = result.values;
|
||||
} else {
|
||||
ASSERT_OUTCOME_HAS_FAILURE(lambda.call({}));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ParallelizeAndRunFHE, nn_small_sequential) {
|
||||
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
|
||||
checkedJit(lambda, R"XXX(
|
||||
func.func @main(%arg0: tensor<4x5x!FHE.eint<5>>) -> tensor<4x7x!FHE.eint<5>> {
|
||||
%cst = arith.constant dense<[[0, 0, 1, 0, 1, 1, 0], [1, 1, 1, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1]]> : tensor<4x7xi6>
|
||||
%cst_0 = arith.constant dense<[[1, 0, 1, 1, 0, 1, 1], [0, 1, 0, 0, 0, 0, 1], [0, 1, 1, 1, 1, 0, 0], [0, 1, 1, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0, 1]]> : tensor<5x7xi6>
|
||||
%0 = "FHELinalg.matmul_eint_int"(%arg0, %cst_0) : (tensor<4x5x!FHE.eint<5>>, tensor<5x7xi6>) -> tensor<4x7x!FHE.eint<5>>
|
||||
%1 = "FHELinalg.add_eint_int"(%0, %cst) : (tensor<4x7x!FHE.eint<5>>, tensor<4x7xi6>) -> tensor<4x7x!FHE.eint<5>>
|
||||
%cst_1 = arith.constant dense<[0, 3, 7, 10, 14, 17, 21, 24, 28, 31, 35, 38, 42, 45, 49, 52, 56, 59, 63, 66, 70, 73, 77, 80, 84, 87, 91, 94, 98, 101, 105, 108]> : tensor<32xi64>
|
||||
%2 = "FHELinalg.apply_lookup_table"(%1, %cst_1) : (tensor<4x7x!FHE.eint<5>>, tensor<32xi64>) -> tensor<4x7x!FHE.eint<5>>
|
||||
return %2 : tensor<4x7x!FHE.eint<5>>
|
||||
}
|
||||
)XXX",
|
||||
"main", false, false, false);
|
||||
|
||||
const size_t dim0 = 4;
|
||||
const size_t dim1 = 5;
|
||||
const size_t dim2 = 7;
|
||||
const std::vector<size_t> inputShape({dim0, dim1});
|
||||
const std::vector<size_t> outputShape({dim0, dim2});
|
||||
std::vector<uint64_t> values;
|
||||
values.reserve(dim0 * dim1);
|
||||
for (size_t i = 0; i < dim0 * dim1; ++i) {
|
||||
values.push_back(i % 17 % 4);
|
||||
}
|
||||
auto input = Tensor<uint64_t>(values, inputShape);
|
||||
|
||||
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
|
||||
auto maybeResult = lambda.call({input});
|
||||
ASSERT_OUTCOME_HAS_VALUE(maybeResult);
|
||||
auto result =
|
||||
maybeResult.value()[0].template getTensor<uint64_t>().value();
|
||||
for (size_t i = 0; i < dim0 * dim2; i++)
|
||||
EXPECT_EQ(parallel_results[i], result.values[i])
|
||||
<< "result differ at pos " << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,148 +0,0 @@
|
||||
#include <concretelang/Runtime/DFRuntime.hpp>
|
||||
#include <cstdint>
|
||||
#include <gtest/gtest.h>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
#include "concretelang/TestLib/TestProgram.h"
|
||||
#include "end_to_end_jit_test.h"
|
||||
#include "tests_tools/GtestEnvironment.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Auto-parallelize independent FHE ops /////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
std::vector<uint64_t> distributed_results;
|
||||
|
||||
TEST(Distributed, nn_med_nested) {
|
||||
checkedJit(lambda, R"XXX(
|
||||
func.func @main(%arg0: tensor<200x4x!FHE.eint<4>>) -> tensor<200x8x!FHE.eint<4>> {
|
||||
%cst = arith.constant dense<"0x01010100010100000001010101000101010101010101010001000101000001010001010100000101000001000001010001000001010100010001000000010100010001010001000001000101010101000100010001000000000100010001000101000001000101010100010001000000000101000100000000000001000100000100000100000001010000010001000101000100010001000100000100000100010101010000000000000000010001010000000100000100010100000100000000010001000101000100000000000101010101000101010101010100010100010100000000000101010100000100010100000001000101000000010101000101000100000101010100010101010000010101010100010000000000000001010101000100010101000001010001010000010001010101000000000000000001000001000000010100000100000101010100010001000000000000010100010101000000010100000100010001010001000000000100010001000101010100010100000001010100010101010100010100010001000001000000000101000101010001000100000101010100000101010100000100010101000100000101000101010100010001000101010100010001010001010000010000010001010000000001000101010001000000000101000000010000010100010001000001000001010101000100010001010100000101000000010001000000000101000101000000010000000001000101010100010001000000000001010000010001000001010101000101010101010100000000000001000100000100000001000000010101010101000000000101010101000100000101000100000000000001000100000101000101010100010000000101000000000100000100000101010000010100000000010000000000010001000100000101010001010101000000000000010000010101010001000000010001010001010000000000000101000000010101010101000001010101000001000001010100000000010001010100000100000101000101010100010001010001000001000100000101000100010100000100010000000101000000010000010001010101010000000101000000010101000001010100000100010001000000000001010000000100010000000000000000000000000001010101010101010101000001010101000001010100000001000101010101010000010101000101010100010101010000010101010100000100000000000101010000000000010101010000000001000000010100000100000001000101010000000001000001000001010001010000010001000101010001010001010101000100010000000100000100010101000000000101010101010001000100000000000101010000010101000001010001010000000001010100000101000001010000000001010101000100010000010101000000000001000101000001010101000101000001000001000000010100010001000101010100010001010000000101000000010001000001000100000101010001000001000001000101010000010001000001000101000000000000000101010000010000000101010100010100010001010101010000000000010001000101010000000001010100000000010001010100010001000001000101000000010100010000010000010001010100010000010001010100010000010100010101010001000100010100010101000100000101010100000100010100000100000000010101000000010001000001010000000101000100000100010101000000010100000101000001010001010100010000000101010000000001010001000000010100010101010001000100010001000001010101000000010001000100000100010101000000000000010100010000000100000000010100010000000100000101010000010101000100010000010100000001000100000000000100000001010101010101000100010001000000010101010100000001000001000001010001000101010100000001010001010100010101000101000000010001010100010101000100000101000101000001000001000001000101010100010001010000000100000101010100000001000000000000010101000100010001000001000001000000000000010100000100000001"> : tensor<200x8xi5>
|
||||
%cst_0 = arith.constant dense<[[1, 0, 0, 0, 1, 0, 0, 1], [0, 0, 1, 1, 0, 0, 0, 0], [1, 1, 0, 1, 1, 0, 1, 1], [1, 1, 0, 0, 1, 0, 1, 1]]> : tensor<4x8xi5>
|
||||
%cst_1 = arith.constant dense<[0, 3, 7, 10, 14, 17, 21, 24, 28, 31, 35, 38, 42, 45, 49, 52]> : tensor<16xi64>
|
||||
%0 = "FHELinalg.matmul_eint_int"(%arg0, %cst_0) : (tensor<200x4x!FHE.eint<4>>, tensor<4x8xi5>) -> tensor<200x8x!FHE.eint<4>>
|
||||
%1 = "FHELinalg.add_eint_int"(%0, %cst) : (tensor<200x8x!FHE.eint<4>>, tensor<200x8xi5>) -> tensor<200x8x!FHE.eint<4>>
|
||||
|
||||
%res = "FHE.zero_tensor"() : () -> tensor<200x8x!FHE.eint<4>>
|
||||
|
||||
%slice_A = tensor.extract_slice %1[0, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_B = tensor.extract_slice %1[25, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_C = tensor.extract_slice %1[50, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_D = tensor.extract_slice %1[75, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_E = tensor.extract_slice %1[100, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_F = tensor.extract_slice %1[125, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_G = tensor.extract_slice %1[150, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_H = tensor.extract_slice %1[175, 0][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_I = tensor.extract_slice %1[0, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_J = tensor.extract_slice %1[25, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_K = tensor.extract_slice %1[50, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_L = tensor.extract_slice %1[75, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_M = tensor.extract_slice %1[100, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_N = tensor.extract_slice %1[125, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_O = tensor.extract_slice %1[150, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
%slice_P = tensor.extract_slice %1[175, 4][25, 4][1, 1] : tensor<200x8x!FHE.eint<4>> to tensor<25x4x!FHE.eint<4>>
|
||||
|
||||
%part_A = "FHELinalg.apply_lookup_table"(%slice_A, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_B = "FHELinalg.apply_lookup_table"(%slice_B, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_C = "FHELinalg.apply_lookup_table"(%slice_C, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_D = "FHELinalg.apply_lookup_table"(%slice_D, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_E = "FHELinalg.apply_lookup_table"(%slice_E, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_F = "FHELinalg.apply_lookup_table"(%slice_F, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_G = "FHELinalg.apply_lookup_table"(%slice_G, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_H = "FHELinalg.apply_lookup_table"(%slice_H, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_I = "FHELinalg.apply_lookup_table"(%slice_I, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_J = "FHELinalg.apply_lookup_table"(%slice_J, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_K = "FHELinalg.apply_lookup_table"(%slice_K, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_L = "FHELinalg.apply_lookup_table"(%slice_L, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_M = "FHELinalg.apply_lookup_table"(%slice_M, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_N = "FHELinalg.apply_lookup_table"(%slice_N, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_O = "FHELinalg.apply_lookup_table"(%slice_O, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
%part_P = "FHELinalg.apply_lookup_table"(%slice_P, %cst_1) : (tensor<25x4x!FHE.eint<4>>, tensor<16xi64>) -> tensor<25x4x!FHE.eint<4>>
|
||||
|
||||
%res_A = tensor.insert_slice %part_A into %res [0, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_B = tensor.insert_slice %part_B into %res_A[25, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_C = tensor.insert_slice %part_C into %res_B[50, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_D = tensor.insert_slice %part_D into %res_C[75, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_E = tensor.insert_slice %part_E into %res_D[100, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_F = tensor.insert_slice %part_F into %res_E[125, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_G = tensor.insert_slice %part_G into %res_F[150, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_H = tensor.insert_slice %part_H into %res_G[175, 0][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_I = tensor.insert_slice %part_I into %res_H[0, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_J = tensor.insert_slice %part_J into %res_I[25, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_K = tensor.insert_slice %part_K into %res_J[50, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_L = tensor.insert_slice %part_L into %res_K[75, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_M = tensor.insert_slice %part_M into %res_L[100, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_N = tensor.insert_slice %part_N into %res_M[125, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_O = tensor.insert_slice %part_O into %res_N[150, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
%res_P = tensor.insert_slice %part_P into %res_O[175, 4][25, 4][1, 1] : tensor<25x4x!FHE.eint<4>> into tensor<200x8x!FHE.eint<4>>
|
||||
|
||||
return %res_P : tensor<200x8x!FHE.eint<4>>
|
||||
}
|
||||
)XXX",
|
||||
"main", false, true, true, DEFAULT_batchTFHEOps,
|
||||
DEFAULT_global_p_error, DEFAULT_chunkedIntegers, DEFAULT_chunkSize,
|
||||
DEFAULT_chunkWidth, false);
|
||||
|
||||
const size_t dim0 = 200;
|
||||
const size_t dim1 = 4;
|
||||
const size_t dim2 = 8;
|
||||
const std::vector<size_t> inputShape({dim0, dim1});
|
||||
const std::vector<size_t> outputShape({dim0, dim2});
|
||||
std::vector<uint64_t> values;
|
||||
values.reserve(dim0 * dim1);
|
||||
for (size_t i = 0; i < dim0 * dim1; ++i) {
|
||||
values.push_back(i % 17 % 4);
|
||||
}
|
||||
auto input = Tensor<uint64_t>(values, inputShape);
|
||||
|
||||
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
|
||||
auto maybeResult = lambda.call({input});
|
||||
ASSERT_OUTCOME_HAS_VALUE(maybeResult);
|
||||
auto result = maybeResult.value()[0].template getTensor<uint64_t>().value();
|
||||
ASSERT_EQ(result.dimensions, outputShape);
|
||||
distributed_results = result.values;
|
||||
} else {
|
||||
ASSERT_OUTCOME_HAS_VALUE(lambda.call({}));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Distributed, nn_med_sequential) {
|
||||
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
|
||||
checkedJit(lambda, R"XXX(
|
||||
func.func @main(%arg0: tensor<200x4x!FHE.eint<4>>) -> tensor<200x8x!FHE.eint<4>> {
|
||||
%cst = arith.constant dense<"0x01010100010100000001010101000101010101010101010001000101000001010001010100000101000001000001010001000001010100010001000000010100010001010001000001000101010101000100010001000000000100010001000101000001000101010100010001000000000101000100000000000001000100000100000100000001010000010001000101000100010001000100000100000100010101010000000000000000010001010000000100000100010100000100000000010001000101000100000000000101010101000101010101010100010100010100000000000101010100000100010100000001000101000000010101000101000100000101010100010101010000010101010100010000000000000001010101000100010101000001010001010000010001010101000000000000000001000001000000010100000100000101010100010001000000000000010100010101000000010100000100010001010001000000000100010001000101010100010100000001010100010101010100010100010001000001000000000101000101010001000100000101010100000101010100000100010101000100000101000101010100010001000101010100010001010001010000010000010001010000000001000101010001000000000101000000010000010100010001000001000001010101000100010001010100000101000000010001000000000101000101000000010000000001000101010100010001000000000001010000010001000001010101000101010101010100000000000001000100000100000001000000010101010101000000000101010101000100000101000100000000000001000100000101000101010100010000000101000000000100000100000101010000010100000000010000000000010001000100000101010001010101000000000000010000010101010001000000010001010001010000000000000101000000010101010101000001010101000001000001010100000000010001010100000100000101000101010100010001010001000001000100000101000100010100000100010000000101000000010000010001010101010000000101000000010101000001010100000100010001000000000001010000000100010000000000000000000000000001010101010101010101000001010101000001010100000001000101010101010000010101000101010100010101010000010101010100000100000000000101010000000000010101010000000001000000010100000100000001000101010000000001000001000001010001010000010001000101010001010001010101000100010000000100000100010101000000000101010101010001000100000000000101010000010101000001010001010000000001010100000101000001010000000001010101000100010000010101000000000001000101000001010101000101000001000001000000010100010001000101010100010001010000000101000000010001000001000100000101010001000001000001000101010000010001000001000101000000000000000101010000010000000101010100010100010001010101010000000000010001000101010000000001010100000000010001010100010001000001000101000000010100010000010000010001010100010000010001010100010000010100010101010001000100010100010101000100000101010100000100010100000100000000010101000000010001000001010000000101000100000100010101000000010100000101000001010001010100010000000101010000000001010001000000010100010101010001000100010001000001010101000000010001000100000100010101000000000000010100010000000100000000010100010000000100000101010000010101000100010000010100000001000100000000000100000001010101010101000100010001000000010101010100000001000001000001010001000101010100000001010001010100010101000101000000010001010100010101000100000101000101000001000001000001000101010100010001010000000100000101010100000001000000000000010101000100010001000001000001000000000000010100000100000001"> : tensor<200x8xi5>
|
||||
%cst_0 = arith.constant dense<[[1, 0, 0, 0, 1, 0, 0, 1], [0, 0, 1, 1, 0, 0, 0, 0], [1, 1, 0, 1, 1, 0, 1, 1], [1, 1, 0, 0, 1, 0, 1, 1]]> : tensor<4x8xi5>
|
||||
%0 = "FHELinalg.matmul_eint_int"(%arg0, %cst_0) : (tensor<200x4x!FHE.eint<4>>, tensor<4x8xi5>) -> tensor<200x8x!FHE.eint<4>>
|
||||
%1 = "FHELinalg.add_eint_int"(%0, %cst) : (tensor<200x8x!FHE.eint<4>>, tensor<200x8xi5>) -> tensor<200x8x!FHE.eint<4>>
|
||||
%cst_1 = arith.constant dense<[0, 3, 7, 10, 14, 17, 21, 24, 28, 31, 35, 38, 42, 45, 49, 52]> : tensor<16xi64>
|
||||
%2 = "FHELinalg.apply_lookup_table"(%1, %cst_1) : (tensor<200x8x!FHE.eint<4>>, tensor<16xi64>) -> tensor<200x8x!FHE.eint<4>>
|
||||
return %2 : tensor<200x8x!FHE.eint<4>>
|
||||
}
|
||||
)XXX",
|
||||
"main", false, false, false, DEFAULT_batchTFHEOps,
|
||||
DEFAULT_global_p_error, DEFAULT_chunkedIntegers,
|
||||
DEFAULT_chunkSize, DEFAULT_chunkWidth, false);
|
||||
|
||||
const size_t dim0 = 200;
|
||||
const size_t dim1 = 4;
|
||||
const size_t dim2 = 8;
|
||||
const std::vector<size_t> inputShape({dim0, dim1});
|
||||
const std::vector<size_t> outputShape({dim0, dim2});
|
||||
std::vector<uint64_t> values;
|
||||
values.reserve(dim0 * dim1);
|
||||
for (size_t i = 0; i < dim0 * dim1; ++i) {
|
||||
values.push_back(i % 17 % 4);
|
||||
}
|
||||
auto input = Tensor<uint64_t>(values, inputShape);
|
||||
|
||||
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
|
||||
auto maybeResult = lambda.call({input});
|
||||
ASSERT_OUTCOME_HAS_VALUE(maybeResult);
|
||||
auto result =
|
||||
maybeResult.value()[0].template getTensor<uint64_t>().value();
|
||||
for (size_t i = 0; i < dim0 * dim2; i++)
|
||||
EXPECT_EQ(distributed_results[i], result.values[i])
|
||||
<< "result differ at pos " << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=end_to_end_jit_distributed
|
||||
#SBATCH --mail-type=BEGIN,END,FAIL
|
||||
#SBATCH --mail-user=antoniu.pop@zama.ai
|
||||
#SBATCH --nodes=4
|
||||
#SBATCH --cpus-per-task=8
|
||||
#SBATCH --time=00:45:00
|
||||
#SBATCH --output=end_to_end_jit_distributed_%j.log
|
||||
|
||||
echo "Date = $(date)"
|
||||
echo "Hostname = $(hostname -s)"
|
||||
echo "Working Directory = $(pwd)"
|
||||
echo ""
|
||||
echo "Number of Nodes Allocated = $SLURM_JOB_NUM_NODES"
|
||||
echo "Number of Tasks Allocated = $SLURM_NTASKS"
|
||||
echo "Number of Cores/Task Allocated = $SLURM_CPUS_PER_TASK"
|
||||
|
||||
export OMP_NUM_THREADS=8
|
||||
export DFR_NUM_THREADS=2
|
||||
|
||||
srun ./build/bin/end_to_end_jit_distributed
|
||||
|
||||
date
|
||||
@@ -407,8 +407,10 @@ func.func @main(%arg0: !FHE.eint<3>) -> !FHE.eint<3> {
|
||||
}
|
||||
)XXX");
|
||||
ASSERT_OUTCOME_HAS_FAILURE_WITH_ERRORMSG(
|
||||
err, "Program can not be composed: Dag is not composable, because of "
|
||||
"output 1: Partition 0 has input coefficient 4");
|
||||
err, "Program can not be composed: At -:4:8: please add "
|
||||
"`fhe.refresh(...)` to guarantee the function composability.\n"
|
||||
"The noise of the node 0 is contaminated by noise coming straight "
|
||||
"from the input (partition: 0, coeff: 4.00).");
|
||||
}
|
||||
|
||||
TEST(CompileNotComposable, not_composable_2) {
|
||||
@@ -428,8 +430,10 @@ func.func @main(%arg0: !FHE.eint<3>) -> (!FHE.eint<3>, !FHE.eint<3>) {
|
||||
}
|
||||
)XXX");
|
||||
ASSERT_OUTCOME_HAS_FAILURE_WITH_ERRORMSG(
|
||||
err, "Program can not be composed: Dag is not composable, because of "
|
||||
"output 1: Partition 0 has input coefficient 4");
|
||||
err, "Program can not be composed: At -:5:8: please add "
|
||||
"`fhe.refresh(...)` to guarantee the function composability.\n"
|
||||
"The noise of the node 0 is contaminated by noise coming straight "
|
||||
"from the input (partition: 0, coeff: 4.00).");
|
||||
}
|
||||
|
||||
TEST(CompileComposable, composable_supported_v0) {
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
#include <type_traits>
|
||||
|
||||
#include "concretelang/Common/Values.h"
|
||||
#include "concretelang/Runtime/DFRuntime.hpp"
|
||||
#include "concretelang/Support/CompilationFeedback.h"
|
||||
#include "concretelang/TestLib/TestProgram.h"
|
||||
#include "end_to_end_fixture/EndToEndFixture.h"
|
||||
@@ -59,18 +58,10 @@ public:
|
||||
void testOnce() {
|
||||
for (auto tests_rep = 0; tests_rep <= options.numberOfRetry; tests_rep++) {
|
||||
// We execute the circuit.
|
||||
auto maybeRes =
|
||||
testCircuit->call((mlir::concretelang::dfr::_dfr_is_root_node())
|
||||
? args
|
||||
: std::vector<Value>());
|
||||
if (!mlir::concretelang::dfr::_dfr_is_root_node())
|
||||
return;
|
||||
auto maybeRes = testCircuit->call(args);
|
||||
ASSERT_OUTCOME_HAS_VALUE(maybeRes);
|
||||
auto result = maybeRes.value();
|
||||
|
||||
if (!mlir::concretelang::dfr::_dfr_is_root_node())
|
||||
return;
|
||||
|
||||
/* Check results */
|
||||
bool allgood = true;
|
||||
for (size_t i = 0; i < desc.outputs.size(); i++) {
|
||||
|
||||
@@ -36,16 +36,16 @@ Result<TestProgram> setupTestProgram(std::string source,
|
||||
auto outputs = circuitEncoding.asBuilder().initOutputs(1);
|
||||
circuitEncoding.asBuilder().setName(funcname);
|
||||
|
||||
auto encodingInfo = Message<concreteprotocol::EncodingInfo>().asBuilder();
|
||||
encodingInfo.initShape();
|
||||
auto integer = encodingInfo.getEncoding().initIntegerCiphertext();
|
||||
auto encodingInfo = Message<concreteprotocol::EncodingInfo>();
|
||||
encodingInfo.asBuilder().initShape();
|
||||
auto integer = encodingInfo.asBuilder().getEncoding().initIntegerCiphertext();
|
||||
integer.getMode().initNative();
|
||||
integer.setWidth(3);
|
||||
integer.setIsSigned(false);
|
||||
|
||||
inputs.setWithCaveats(0, encodingInfo);
|
||||
inputs.setWithCaveats(1, encodingInfo);
|
||||
outputs.setWithCaveats(0, encodingInfo);
|
||||
inputs.setWithCaveats(0, encodingInfo.asReader());
|
||||
inputs.setWithCaveats(1, encodingInfo.asReader());
|
||||
outputs.setWithCaveats(0, encodingInfo.asReader());
|
||||
|
||||
options.encodings = Message<concreteprotocol::ProgramEncodingInfo>();
|
||||
options.encodings->asBuilder().initCircuits(1).setWithCaveats(
|
||||
@@ -61,14 +61,14 @@ Result<TestProgram> setupTestProgram(std::string source,
|
||||
TEST(Encodings_unit_tests, multi_key) {
|
||||
std::string source = R"(
|
||||
func.func @main(
|
||||
%arg0: !TFHE.glwe<sk<1,1,2048>>,
|
||||
%arg0: !TFHE.glwe<sk<1,1,2048>>,
|
||||
%arg1: !TFHE.glwe<sk<2,1,2048>>
|
||||
) -> !TFHE.glwe<sk<2,1,2048>> {
|
||||
|
||||
%0 = "TFHE.keyswitch_glwe"(%arg0) {key=#TFHE.ksk<sk<1,1,2048>, sk<2, 1,2048>, 7, 2>} : (!TFHE.glwe<sk<1, 1, 2048>>) -> !TFHE.glwe<sk<2, 1, 2048>>
|
||||
%1 = "TFHE.add_glwe"(%arg1, %0) : (!TFHE.glwe<sk<2,1,2048>>, !TFHE.glwe<sk<2,1,2048>>) -> !TFHE.glwe<sk<2,1,2048>>
|
||||
return %1 : !TFHE.glwe<sk<2,1,2048>>
|
||||
|
||||
|
||||
}
|
||||
)";
|
||||
ASSERT_ASSIGN_OUTCOME_VALUE(circuit, setupTestProgram(source));
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
#![allow(clippy::boxed_local)]
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use concrete_optimizer::computing_cost::cpu::CpuComplexity;
|
||||
use concrete_optimizer::config;
|
||||
use concrete_optimizer::config::ProcessingUnit;
|
||||
@@ -42,7 +45,7 @@ fn caches_from(options: ffi::Options) -> decomposition::PersistDecompCaches {
|
||||
decomposition::cache(
|
||||
options.security_level,
|
||||
processing_unit,
|
||||
Some(ProcessingUnit::Cpu.complexity_model()),
|
||||
Some(processing_unit.complexity_model()),
|
||||
options.cache_on_disk,
|
||||
options.ciphertext_modulus_log,
|
||||
options.fft_precision,
|
||||
@@ -612,12 +615,19 @@ impl Dag {
|
||||
pub struct DagBuilder<'dag>(unparametrized::DagBuilder<'dag>);
|
||||
|
||||
impl<'dag> DagBuilder<'dag> {
|
||||
fn add_input(&mut self, out_precision: Precision, out_shape: &[u64]) -> ffi::OperatorIndex {
|
||||
fn add_input(
|
||||
&mut self,
|
||||
out_precision: Precision,
|
||||
out_shape: &[u64],
|
||||
location: &Location,
|
||||
) -> ffi::OperatorIndex {
|
||||
let out_shape = Shape {
|
||||
dimensions_size: out_shape.to_owned(),
|
||||
};
|
||||
|
||||
self.0.add_input(out_precision, out_shape).into()
|
||||
self.0
|
||||
.add_input(out_precision, out_shape, location.0.clone())
|
||||
.into()
|
||||
}
|
||||
|
||||
fn add_lut(
|
||||
@@ -625,12 +635,15 @@ impl<'dag> DagBuilder<'dag> {
|
||||
input: ffi::OperatorIndex,
|
||||
table: &[u64],
|
||||
out_precision: Precision,
|
||||
location: &Location,
|
||||
) -> ffi::OperatorIndex {
|
||||
let table = FunctionTable {
|
||||
values: table.to_owned(),
|
||||
};
|
||||
|
||||
self.0.add_lut(input.into(), table, out_precision).into()
|
||||
self.0
|
||||
.add_lut(input.into(), table, out_precision, location.0.clone())
|
||||
.into()
|
||||
}
|
||||
|
||||
#[allow(clippy::boxed_local)]
|
||||
@@ -638,10 +651,11 @@ impl<'dag> DagBuilder<'dag> {
|
||||
&mut self,
|
||||
inputs: &[ffi::OperatorIndex],
|
||||
weights: Box<Weights>,
|
||||
location: &Location,
|
||||
) -> ffi::OperatorIndex {
|
||||
let inputs: Vec<OperatorIndex> = inputs.iter().copied().map(Into::into).collect();
|
||||
|
||||
self.0.add_dot(inputs, weights.0).into()
|
||||
self.0.add_dot(inputs, weights.0, location.0.clone()).into()
|
||||
}
|
||||
|
||||
fn add_levelled_op(
|
||||
@@ -649,10 +663,12 @@ impl<'dag> DagBuilder<'dag> {
|
||||
inputs: &[ffi::OperatorIndex],
|
||||
lwe_dim_cost_factor: f64,
|
||||
fixed_cost: f64,
|
||||
manp: f64,
|
||||
weights: &[f64],
|
||||
out_shape: &[u64],
|
||||
comment: &str,
|
||||
location: &Location,
|
||||
) -> ffi::OperatorIndex {
|
||||
debug_assert!(weights.len() == inputs.len());
|
||||
let inputs: Vec<OperatorIndex> = inputs.iter().copied().map(Into::into).collect();
|
||||
|
||||
let out_shape = Shape {
|
||||
@@ -665,7 +681,14 @@ impl<'dag> DagBuilder<'dag> {
|
||||
};
|
||||
|
||||
self.0
|
||||
.add_levelled_op(inputs, complexity, manp, out_shape, comment)
|
||||
.add_levelled_op(
|
||||
inputs,
|
||||
complexity,
|
||||
weights,
|
||||
out_shape,
|
||||
comment,
|
||||
location.0.clone(),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
|
||||
@@ -673,16 +696,22 @@ impl<'dag> DagBuilder<'dag> {
|
||||
&mut self,
|
||||
input: ffi::OperatorIndex,
|
||||
rounded_precision: Precision,
|
||||
location: &Location,
|
||||
) -> ffi::OperatorIndex {
|
||||
self.0.add_round_op(input.into(), rounded_precision).into()
|
||||
self.0
|
||||
.add_round_op(input.into(), rounded_precision, location.0.clone())
|
||||
.into()
|
||||
}
|
||||
|
||||
fn add_unsafe_cast_op(
|
||||
&mut self,
|
||||
input: ffi::OperatorIndex,
|
||||
new_precision: Precision,
|
||||
location: &Location,
|
||||
) -> ffi::OperatorIndex {
|
||||
self.0.add_unsafe_cast(input.into(), new_precision).into()
|
||||
self.0
|
||||
.add_unsafe_cast(input.into(), new_precision, location.0.clone())
|
||||
.into()
|
||||
}
|
||||
|
||||
fn tag_operator_as_output(&mut self, op: ffi::OperatorIndex) {
|
||||
@@ -694,6 +723,30 @@ impl<'dag> DagBuilder<'dag> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Location(operator::Location);
|
||||
|
||||
fn location_unknown() -> Box<Location> {
|
||||
Box::new(Location(operator::Location::Unknown))
|
||||
}
|
||||
|
||||
fn location_from_string(string: &str) -> Box<Location> {
|
||||
let location: Vec<&str> = string.split(':').collect();
|
||||
match location[..] {
|
||||
[file] => Box::new(Location(operator::Location::File(file.into()))),
|
||||
[file, line] => Box::new(Location(operator::Location::Line(
|
||||
file.into(),
|
||||
line.parse().unwrap(),
|
||||
))),
|
||||
[file, line, column] => Box::new(Location(operator::Location::LineColumn(
|
||||
file.into(),
|
||||
line.parse().unwrap(),
|
||||
column.parse().unwrap(),
|
||||
))),
|
||||
_ => Box::new(Location(operator::Location::Unknown)),
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Weights(operator::Weights);
|
||||
|
||||
fn vector(weights: &[i64]) -> Box<Weights> {
|
||||
@@ -748,6 +801,14 @@ mod ffi {
|
||||
|
||||
type DagBuilder<'dag>;
|
||||
|
||||
type Location;
|
||||
|
||||
#[namespace = "concrete_optimizer::utils"]
|
||||
fn location_unknown() -> Box<Location>;
|
||||
|
||||
#[namespace = "concrete_optimizer::utils"]
|
||||
fn location_from_string(string: &str) -> Box<Location>;
|
||||
|
||||
#[namespace = "concrete_optimizer::dag"]
|
||||
fn empty() -> Box<Dag>;
|
||||
|
||||
@@ -761,6 +822,7 @@ mod ffi {
|
||||
self: &mut DagBuilder<'_>,
|
||||
out_precision: u8,
|
||||
out_shape: &[u64],
|
||||
location: &Location,
|
||||
) -> OperatorIndex;
|
||||
|
||||
unsafe fn add_lut(
|
||||
@@ -768,12 +830,14 @@ mod ffi {
|
||||
input: OperatorIndex,
|
||||
table: &[u64],
|
||||
out_precision: u8,
|
||||
location: &Location,
|
||||
) -> OperatorIndex;
|
||||
|
||||
unsafe fn add_dot(
|
||||
self: &mut DagBuilder<'_>,
|
||||
inputs: &[OperatorIndex],
|
||||
weights: Box<Weights>,
|
||||
location: &Location,
|
||||
) -> OperatorIndex;
|
||||
|
||||
unsafe fn add_levelled_op(
|
||||
@@ -781,21 +845,24 @@ mod ffi {
|
||||
inputs: &[OperatorIndex],
|
||||
lwe_dim_cost_factor: f64,
|
||||
fixed_cost: f64,
|
||||
manp: f64,
|
||||
weights: &[f64],
|
||||
out_shape: &[u64],
|
||||
comment: &str,
|
||||
location: &Location,
|
||||
) -> OperatorIndex;
|
||||
|
||||
unsafe fn add_round_op(
|
||||
self: &mut DagBuilder<'_>,
|
||||
input: OperatorIndex,
|
||||
rounded_precision: u8,
|
||||
location: &Location,
|
||||
) -> OperatorIndex;
|
||||
|
||||
unsafe fn add_unsafe_cast_op(
|
||||
self: &mut DagBuilder<'_>,
|
||||
input: OperatorIndex,
|
||||
rounded_precision: u8,
|
||||
location: &Location,
|
||||
) -> OperatorIndex;
|
||||
|
||||
unsafe fn tag_operator_as_output(self: &mut DagBuilder<'_>, op: OperatorIndex);
|
||||
|
||||
@@ -943,6 +943,7 @@ struct CircuitKeys;
|
||||
namespace concrete_optimizer {
|
||||
struct Dag;
|
||||
struct DagBuilder;
|
||||
struct Location;
|
||||
struct Weights;
|
||||
enum class Encoding : ::std::uint8_t;
|
||||
enum class MultiParamStrategy : ::std::uint8_t;
|
||||
@@ -993,12 +994,12 @@ private:
|
||||
#define CXXBRIDGE1_STRUCT_concrete_optimizer$DagBuilder
|
||||
struct DagBuilder final : public ::rust::Opaque {
|
||||
::rust::String dump() const noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, double manp, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape, ::concrete_optimizer::Location const &location) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision, ::concrete_optimizer::Location const &location) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights, ::concrete_optimizer::Location const &location) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, ::rust::Slice<double const> weights, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment, ::concrete_optimizer::Location const &location) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
|
||||
void tag_operator_as_output(::concrete_optimizer::dag::OperatorIndex op) noexcept;
|
||||
~DagBuilder() = delete;
|
||||
|
||||
@@ -1011,6 +1012,20 @@ private:
|
||||
};
|
||||
#endif // CXXBRIDGE1_STRUCT_concrete_optimizer$DagBuilder
|
||||
|
||||
#ifndef CXXBRIDGE1_STRUCT_concrete_optimizer$Location
|
||||
#define CXXBRIDGE1_STRUCT_concrete_optimizer$Location
|
||||
struct Location final : public ::rust::Opaque {
|
||||
~Location() = delete;
|
||||
|
||||
private:
|
||||
friend ::rust::layout;
|
||||
struct layout {
|
||||
static ::std::size_t size() noexcept;
|
||||
static ::std::size_t align() noexcept;
|
||||
};
|
||||
};
|
||||
#endif // CXXBRIDGE1_STRUCT_concrete_optimizer$Location
|
||||
|
||||
#ifndef CXXBRIDGE1_STRUCT_concrete_optimizer$Weights
|
||||
#define CXXBRIDGE1_STRUCT_concrete_optimizer$Weights
|
||||
struct Weights final : public ::rust::Opaque {
|
||||
@@ -1288,8 +1303,18 @@ extern "C" {
|
||||
::std::size_t concrete_optimizer$cxxbridge1$Dag$operator$alignof() noexcept;
|
||||
::std::size_t concrete_optimizer$cxxbridge1$DagBuilder$operator$sizeof() noexcept;
|
||||
::std::size_t concrete_optimizer$cxxbridge1$DagBuilder$operator$alignof() noexcept;
|
||||
::std::size_t concrete_optimizer$cxxbridge1$Location$operator$sizeof() noexcept;
|
||||
::std::size_t concrete_optimizer$cxxbridge1$Location$operator$alignof() noexcept;
|
||||
} // extern "C"
|
||||
|
||||
namespace utils {
|
||||
extern "C" {
|
||||
::concrete_optimizer::Location *concrete_optimizer$utils$cxxbridge1$location_unknown() noexcept;
|
||||
|
||||
::concrete_optimizer::Location *concrete_optimizer$utils$cxxbridge1$location_from_string(::rust::Str string) noexcept;
|
||||
} // extern "C"
|
||||
} // namespace utils
|
||||
|
||||
namespace dag {
|
||||
extern "C" {
|
||||
::concrete_optimizer::Dag *concrete_optimizer$dag$cxxbridge1$empty() noexcept;
|
||||
@@ -1303,17 +1328,17 @@ void concrete_optimizer$cxxbridge1$Dag$dump(::concrete_optimizer::Dag const &sel
|
||||
|
||||
void concrete_optimizer$cxxbridge1$DagBuilder$dump(::concrete_optimizer::DagBuilder const &self, ::rust::String *return$) noexcept;
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_input(::concrete_optimizer::DagBuilder &self, ::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_input(::concrete_optimizer::DagBuilder &self, ::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape, ::concrete_optimizer::Location const &location) noexcept;
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_lut(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_lut(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision, ::concrete_optimizer::Location const &location) noexcept;
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_dot(::concrete_optimizer::DagBuilder &self, ::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::concrete_optimizer::Weights *weights) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_dot(::concrete_optimizer::DagBuilder &self, ::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::concrete_optimizer::Weights *weights, ::concrete_optimizer::Location const &location) noexcept;
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_levelled_op(::concrete_optimizer::DagBuilder &self, ::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, double manp, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_levelled_op(::concrete_optimizer::DagBuilder &self, ::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, ::rust::Slice<double const> weights, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment, ::concrete_optimizer::Location const &location) noexcept;
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_round_op(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_round_op(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_unsafe_cast_op(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex concrete_optimizer$cxxbridge1$DagBuilder$add_unsafe_cast_op(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
|
||||
|
||||
void concrete_optimizer$cxxbridge1$DagBuilder$tag_operator_as_output(::concrete_optimizer::DagBuilder &self, ::concrete_optimizer::dag::OperatorIndex op) noexcept;
|
||||
|
||||
@@ -1393,6 +1418,24 @@ namespace utils {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$operator$alignof();
|
||||
}
|
||||
|
||||
::std::size_t Location::layout::size() noexcept {
|
||||
return concrete_optimizer$cxxbridge1$Location$operator$sizeof();
|
||||
}
|
||||
|
||||
::std::size_t Location::layout::align() noexcept {
|
||||
return concrete_optimizer$cxxbridge1$Location$operator$alignof();
|
||||
}
|
||||
|
||||
namespace utils {
|
||||
::rust::Box<::concrete_optimizer::Location> location_unknown() noexcept {
|
||||
return ::rust::Box<::concrete_optimizer::Location>::from_raw(concrete_optimizer$utils$cxxbridge1$location_unknown());
|
||||
}
|
||||
|
||||
::rust::Box<::concrete_optimizer::Location> location_from_string(::rust::Str string) noexcept {
|
||||
return ::rust::Box<::concrete_optimizer::Location>::from_raw(concrete_optimizer$utils$cxxbridge1$location_from_string(string));
|
||||
}
|
||||
} // namespace utils
|
||||
|
||||
namespace dag {
|
||||
::rust::Box<::concrete_optimizer::Dag> empty() noexcept {
|
||||
return ::rust::Box<::concrete_optimizer::Dag>::from_raw(concrete_optimizer$dag$cxxbridge1$empty());
|
||||
@@ -1415,28 +1458,28 @@ namespace dag {
|
||||
return ::std::move(return$.value);
|
||||
}
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_input(*this, out_precision, out_shape);
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape, ::concrete_optimizer::Location const &location) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_input(*this, out_precision, out_shape, location);
|
||||
}
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_lut(*this, input, table, out_precision);
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision, ::concrete_optimizer::Location const &location) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_lut(*this, input, table, out_precision, location);
|
||||
}
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_dot(*this, inputs, weights.into_raw());
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights, ::concrete_optimizer::Location const &location) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_dot(*this, inputs, weights.into_raw(), location);
|
||||
}
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, double manp, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_levelled_op(*this, inputs, lwe_dim_cost_factor, fixed_cost, manp, out_shape, comment);
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, ::rust::Slice<double const> weights, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment, ::concrete_optimizer::Location const &location) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_levelled_op(*this, inputs, lwe_dim_cost_factor, fixed_cost, weights, out_shape, comment, location);
|
||||
}
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_round_op(*this, input, rounded_precision);
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_round_op(*this, input, rounded_precision, location);
|
||||
}
|
||||
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_unsafe_cast_op(*this, input, rounded_precision);
|
||||
::concrete_optimizer::dag::OperatorIndex DagBuilder::add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept {
|
||||
return concrete_optimizer$cxxbridge1$DagBuilder$add_unsafe_cast_op(*this, input, rounded_precision, location);
|
||||
}
|
||||
|
||||
void DagBuilder::tag_operator_as_output(::concrete_optimizer::dag::OperatorIndex op) noexcept {
|
||||
@@ -1517,6 +1560,10 @@ namespace weights {
|
||||
} // namespace concrete_optimizer
|
||||
|
||||
extern "C" {
|
||||
::concrete_optimizer::Location *cxxbridge1$box$concrete_optimizer$Location$alloc() noexcept;
|
||||
void cxxbridge1$box$concrete_optimizer$Location$dealloc(::concrete_optimizer::Location *) noexcept;
|
||||
void cxxbridge1$box$concrete_optimizer$Location$drop(::rust::Box<::concrete_optimizer::Location> *ptr) noexcept;
|
||||
|
||||
::concrete_optimizer::Dag *cxxbridge1$box$concrete_optimizer$Dag$alloc() noexcept;
|
||||
void cxxbridge1$box$concrete_optimizer$Dag$dealloc(::concrete_optimizer::Dag *) noexcept;
|
||||
void cxxbridge1$box$concrete_optimizer$Dag$drop(::rust::Box<::concrete_optimizer::Dag> *ptr) noexcept;
|
||||
@@ -1605,6 +1652,18 @@ void cxxbridge1$rust_vec$concrete_optimizer$dag$InstructionKeys$truncate(::rust:
|
||||
namespace rust {
|
||||
inline namespace cxxbridge1 {
|
||||
template <>
|
||||
::concrete_optimizer::Location *Box<::concrete_optimizer::Location>::allocation::alloc() noexcept {
|
||||
return cxxbridge1$box$concrete_optimizer$Location$alloc();
|
||||
}
|
||||
template <>
|
||||
void Box<::concrete_optimizer::Location>::allocation::dealloc(::concrete_optimizer::Location *ptr) noexcept {
|
||||
cxxbridge1$box$concrete_optimizer$Location$dealloc(ptr);
|
||||
}
|
||||
template <>
|
||||
void Box<::concrete_optimizer::Location>::drop() noexcept {
|
||||
cxxbridge1$box$concrete_optimizer$Location$drop(this);
|
||||
}
|
||||
template <>
|
||||
::concrete_optimizer::Dag *Box<::concrete_optimizer::Dag>::allocation::alloc() noexcept {
|
||||
return cxxbridge1$box$concrete_optimizer$Dag$alloc();
|
||||
}
|
||||
|
||||
@@ -924,6 +924,7 @@ struct CircuitKeys;
|
||||
namespace concrete_optimizer {
|
||||
struct Dag;
|
||||
struct DagBuilder;
|
||||
struct Location;
|
||||
struct Weights;
|
||||
enum class Encoding : ::std::uint8_t;
|
||||
enum class MultiParamStrategy : ::std::uint8_t;
|
||||
@@ -974,12 +975,12 @@ private:
|
||||
#define CXXBRIDGE1_STRUCT_concrete_optimizer$DagBuilder
|
||||
struct DagBuilder final : public ::rust::Opaque {
|
||||
::rust::String dump() const noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, double manp, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_input(::std::uint8_t out_precision, ::rust::Slice<::std::uint64_t const> out_shape, ::concrete_optimizer::Location const &location) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_lut(::concrete_optimizer::dag::OperatorIndex input, ::rust::Slice<::std::uint64_t const> table, ::std::uint8_t out_precision, ::concrete_optimizer::Location const &location) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_dot(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, ::rust::Box<::concrete_optimizer::Weights> weights, ::concrete_optimizer::Location const &location) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_levelled_op(::rust::Slice<::concrete_optimizer::dag::OperatorIndex const> inputs, double lwe_dim_cost_factor, double fixed_cost, ::rust::Slice<double const> weights, ::rust::Slice<::std::uint64_t const> out_shape, ::rust::Str comment, ::concrete_optimizer::Location const &location) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_round_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
|
||||
::concrete_optimizer::dag::OperatorIndex add_unsafe_cast_op(::concrete_optimizer::dag::OperatorIndex input, ::std::uint8_t rounded_precision, ::concrete_optimizer::Location const &location) noexcept;
|
||||
void tag_operator_as_output(::concrete_optimizer::dag::OperatorIndex op) noexcept;
|
||||
~DagBuilder() = delete;
|
||||
|
||||
@@ -992,6 +993,20 @@ private:
|
||||
};
|
||||
#endif // CXXBRIDGE1_STRUCT_concrete_optimizer$DagBuilder
|
||||
|
||||
#ifndef CXXBRIDGE1_STRUCT_concrete_optimizer$Location
|
||||
#define CXXBRIDGE1_STRUCT_concrete_optimizer$Location
|
||||
struct Location final : public ::rust::Opaque {
|
||||
~Location() = delete;
|
||||
|
||||
private:
|
||||
friend ::rust::layout;
|
||||
struct layout {
|
||||
static ::std::size_t size() noexcept;
|
||||
static ::std::size_t align() noexcept;
|
||||
};
|
||||
};
|
||||
#endif // CXXBRIDGE1_STRUCT_concrete_optimizer$Location
|
||||
|
||||
#ifndef CXXBRIDGE1_STRUCT_concrete_optimizer$Weights
|
||||
#define CXXBRIDGE1_STRUCT_concrete_optimizer$Weights
|
||||
struct Weights final : public ::rust::Opaque {
|
||||
@@ -1258,6 +1273,10 @@ namespace utils {
|
||||
::concrete_optimizer::dag::DagSolution convert_to_dag_solution(::concrete_optimizer::v0::Solution const &solution) noexcept;
|
||||
|
||||
::concrete_optimizer::dag::CircuitSolution convert_to_circuit_solution(::concrete_optimizer::dag::DagSolution const &solution, ::concrete_optimizer::Dag const &dag) noexcept;
|
||||
|
||||
::rust::Box<::concrete_optimizer::Location> location_unknown() noexcept;
|
||||
|
||||
::rust::Box<::concrete_optimizer::Location> location_from_string(::rust::Str string) noexcept;
|
||||
} // namespace utils
|
||||
|
||||
namespace dag {
|
||||
|
||||
@@ -51,7 +51,7 @@ TEST test_dag_no_lut() {
|
||||
std::vector<uint64_t> shape = {3};
|
||||
|
||||
concrete_optimizer::dag::OperatorIndex node1 =
|
||||
builder->add_input(PRECISION_8B, slice(shape));
|
||||
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
|
||||
|
||||
std::vector<concrete_optimizer::dag::OperatorIndex> inputs = {node1};
|
||||
|
||||
@@ -60,7 +60,7 @@ TEST test_dag_no_lut() {
|
||||
rust::cxxbridge1::Box<concrete_optimizer::Weights> weights =
|
||||
concrete_optimizer::weights::vector(slice(weight_vec));
|
||||
|
||||
auto id = builder->add_dot(slice(inputs), std::move(weights));
|
||||
auto id = builder->add_dot(slice(inputs), std::move(weights), *concrete_optimizer::utils::location_unknown());
|
||||
builder->tag_operator_as_output(id);
|
||||
|
||||
auto solution = dag->optimize(default_options());
|
||||
@@ -75,10 +75,10 @@ TEST test_dag_lut() {
|
||||
std::vector<uint64_t> shape = {3};
|
||||
|
||||
concrete_optimizer::dag::OperatorIndex input =
|
||||
builder->add_input(PRECISION_8B, slice(shape));
|
||||
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
|
||||
|
||||
std::vector<u_int64_t> table = {};
|
||||
auto id = builder->add_lut(input, slice(table), PRECISION_8B);
|
||||
auto id = builder->add_lut(input, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
|
||||
builder->tag_operator_as_output(id);
|
||||
|
||||
auto solution = dag->optimize(default_options());
|
||||
@@ -94,10 +94,10 @@ TEST test_dag_lut_wop() {
|
||||
std::vector<uint64_t> shape = {3};
|
||||
|
||||
concrete_optimizer::dag::OperatorIndex input =
|
||||
builder->add_input(PRECISION_16B, slice(shape));
|
||||
builder->add_input(PRECISION_16B, slice(shape), *concrete_optimizer::utils::location_unknown());
|
||||
|
||||
std::vector<u_int64_t> table = {};
|
||||
auto id = builder->add_lut(input, slice(table), PRECISION_16B);
|
||||
auto id = builder->add_lut(input, slice(table), PRECISION_16B, *concrete_optimizer::utils::location_unknown());
|
||||
builder->tag_operator_as_output(id);
|
||||
|
||||
auto solution = dag->optimize(default_options());
|
||||
@@ -113,10 +113,10 @@ TEST test_dag_lut_force_wop() {
|
||||
std::vector<uint64_t> shape = {3};
|
||||
|
||||
concrete_optimizer::dag::OperatorIndex input =
|
||||
builder->add_input(PRECISION_8B, slice(shape));
|
||||
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
|
||||
|
||||
std::vector<u_int64_t> table = {};
|
||||
auto id = builder->add_lut(input, slice(table), PRECISION_8B);
|
||||
auto id = builder->add_lut(input, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
|
||||
builder->tag_operator_as_output(id);
|
||||
|
||||
auto options = default_options();
|
||||
@@ -133,10 +133,10 @@ TEST test_multi_parameters_1_precision() {
|
||||
std::vector<uint64_t> shape = {3};
|
||||
|
||||
concrete_optimizer::dag::OperatorIndex input =
|
||||
builder->add_input(PRECISION_8B, slice(shape));
|
||||
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
|
||||
|
||||
std::vector<u_int64_t> table = {};
|
||||
auto id = builder->add_lut(input, slice(table), PRECISION_8B);
|
||||
auto id = builder->add_lut(input, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
|
||||
builder->tag_operator_as_output(id);
|
||||
|
||||
auto options = default_options();
|
||||
@@ -160,14 +160,14 @@ TEST test_multi_parameters_2_precision() {
|
||||
std::vector<uint64_t> shape = {3};
|
||||
|
||||
concrete_optimizer::dag::OperatorIndex input1 =
|
||||
builder->add_input(PRECISION_8B, slice(shape));
|
||||
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
|
||||
|
||||
concrete_optimizer::dag::OperatorIndex input2 =
|
||||
builder->add_input(PRECISION_1B, slice(shape));
|
||||
builder->add_input(PRECISION_1B, slice(shape), *concrete_optimizer::utils::location_unknown());
|
||||
|
||||
std::vector<u_int64_t> table = {};
|
||||
auto lut1 = builder->add_lut(input1, slice(table), PRECISION_8B);
|
||||
auto lut2 = builder->add_lut(input2, slice(table), PRECISION_8B);
|
||||
auto lut1 = builder->add_lut(input1, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
|
||||
auto lut2 = builder->add_lut(input2, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
|
||||
|
||||
std::vector<concrete_optimizer::dag::OperatorIndex> inputs = {lut1, lut2};
|
||||
|
||||
@@ -176,7 +176,7 @@ TEST test_multi_parameters_2_precision() {
|
||||
rust::cxxbridge1::Box<concrete_optimizer::Weights> weights =
|
||||
concrete_optimizer::weights::vector(slice(weight_vec));
|
||||
|
||||
auto id = builder->add_dot(slice(inputs), std::move(weights));
|
||||
auto id = builder->add_dot(slice(inputs), std::move(weights), *concrete_optimizer::utils::location_unknown());
|
||||
builder->tag_operator_as_output(id);
|
||||
|
||||
auto options = default_options();
|
||||
@@ -201,14 +201,14 @@ TEST test_multi_parameters_2_precision_crt() {
|
||||
std::vector<uint64_t> shape = {3};
|
||||
|
||||
concrete_optimizer::dag::OperatorIndex input1 =
|
||||
builder->add_input(PRECISION_8B, slice(shape));
|
||||
builder->add_input(PRECISION_8B, slice(shape), *concrete_optimizer::utils::location_unknown());
|
||||
|
||||
concrete_optimizer::dag::OperatorIndex input2 =
|
||||
builder->add_input(PRECISION_1B, slice(shape));
|
||||
builder->add_input(PRECISION_1B, slice(shape), *concrete_optimizer::utils::location_unknown());
|
||||
|
||||
std::vector<u_int64_t> table = {};
|
||||
auto lut1 = builder->add_lut(input1, slice(table), PRECISION_8B);
|
||||
auto lut2 = builder->add_lut(input2, slice(table), PRECISION_8B);
|
||||
auto lut1 = builder->add_lut(input1, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
|
||||
auto lut2 = builder->add_lut(input2, slice(table), PRECISION_8B, *concrete_optimizer::utils::location_unknown());
|
||||
|
||||
std::vector<concrete_optimizer::dag::OperatorIndex> inputs = {lut1, lut2};
|
||||
|
||||
@@ -217,7 +217,7 @@ TEST test_multi_parameters_2_precision_crt() {
|
||||
rust::cxxbridge1::Box<concrete_optimizer::Weights> weights =
|
||||
concrete_optimizer::weights::vector(slice(weight_vec));
|
||||
|
||||
auto id = builder->add_dot(slice(inputs), std::move(weights));
|
||||
auto id = builder->add_dot(slice(inputs), std::move(weights), *concrete_optimizer::utils::location_unknown());
|
||||
builder->tag_operator_as_output(id);
|
||||
|
||||
auto options = default_options();
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
use std::{fmt::Display, path::PathBuf};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Location {
|
||||
Unknown,
|
||||
File(PathBuf),
|
||||
Line(PathBuf, usize),
|
||||
LineColumn(PathBuf, usize, usize),
|
||||
}
|
||||
|
||||
impl Display for Location {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Unknown => write!(f, "unknown location"),
|
||||
Self::File(file) => write!(f, "{}", file.file_name().unwrap().to_str().unwrap()),
|
||||
Self::Line(file, line) => {
|
||||
write!(f, "{}:{line}", file.file_name().unwrap().to_str().unwrap())
|
||||
}
|
||||
Self::LineColumn(file, line, column) => {
|
||||
write!(
|
||||
f,
|
||||
"{}:{line}:{column}",
|
||||
file.file_name().unwrap().to_str().unwrap()
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,10 @@
|
||||
#![allow(clippy::module_inception)]
|
||||
pub mod dot_kind;
|
||||
pub mod location;
|
||||
pub mod operator;
|
||||
pub mod tensor;
|
||||
|
||||
pub use self::dot_kind::*;
|
||||
pub use self::location::*;
|
||||
pub use self::operator::*;
|
||||
pub use self::tensor::*;
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::iter::{empty, once};
|
||||
use std::ops::Deref;
|
||||
|
||||
use crate::dag::operator::tensor::{ClearTensor, Shape};
|
||||
use crate::optimization::dag::multi_parameters::partition_cut::ExternalPartition;
|
||||
|
||||
use super::DotKind;
|
||||
|
||||
@@ -89,7 +90,7 @@ pub enum Operator {
|
||||
LevelledOp {
|
||||
inputs: Vec<OperatorIndex>,
|
||||
complexity: LevelledComplexity,
|
||||
manp: f64,
|
||||
weights: Vec<f64>,
|
||||
out_shape: Shape,
|
||||
comment: String,
|
||||
},
|
||||
@@ -104,6 +105,11 @@ pub enum Operator {
|
||||
input: OperatorIndex,
|
||||
out_precision: Precision,
|
||||
},
|
||||
ChangePartition {
|
||||
input: OperatorIndex,
|
||||
src_partition: Option<ExternalPartition>,
|
||||
dst_partition: Option<ExternalPartition>,
|
||||
},
|
||||
}
|
||||
|
||||
impl Operator {
|
||||
@@ -114,7 +120,8 @@ impl Operator {
|
||||
Self::LevelledOp { inputs, .. } | Self::Dot { inputs, .. } => Box::new(inputs.iter()),
|
||||
Self::UnsafeCast { input, .. }
|
||||
| Self::Lut { input, .. }
|
||||
| Self::Round { input, .. } => Box::new(once(input)),
|
||||
| Self::Round { input, .. }
|
||||
| Self::ChangePartition { input, .. } => Box::new(once(input)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -171,7 +178,7 @@ impl fmt::Display for Operator {
|
||||
}
|
||||
Self::LevelledOp {
|
||||
inputs,
|
||||
manp,
|
||||
weights,
|
||||
out_shape,
|
||||
..
|
||||
} => {
|
||||
@@ -182,7 +189,7 @@ impl fmt::Display for Operator {
|
||||
}
|
||||
write!(f, "%{}", input.0)?;
|
||||
}
|
||||
write!(f, "] : manp={manp} x {out_shape:?}")?;
|
||||
write!(f, "] : weights={weights:?}, out_shape={out_shape:?}")?;
|
||||
}
|
||||
Self::Round {
|
||||
input,
|
||||
@@ -190,6 +197,23 @@ impl fmt::Display for Operator {
|
||||
} => {
|
||||
write!(f, "ROUND[%{}] : u{out_precision}", input.0)?;
|
||||
}
|
||||
Self::ChangePartition {
|
||||
input,
|
||||
src_partition,
|
||||
dst_partition,
|
||||
} => {
|
||||
write!(f, "CHANGE_PARTITION[%{}] : {{", input.0)?;
|
||||
if let Some(partition) = src_partition {
|
||||
write!(f, "src_partition: {}", partition.name)?;
|
||||
}
|
||||
if let Some(partition) = dst_partition {
|
||||
if src_partition.is_some() {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
write!(f, "dst_partition: {}", partition.name)?;
|
||||
}
|
||||
write!(f, "}}")?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user