mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-05 04:44:41 -05:00
feat(hpu): Add Hpu backend implementation
This backend abstract communication with Hpu Fpga hardware.
It define it's proper entities to prevent circular dependencies with
tfhe-rs.
Object lifetime is handle through Arc<Mutex<T>> wrapper, and enforce
that all objects currently alive in Hpu Hw are also kept valid on the
host side.
It contains the second version of HPU instruction set (HIS_V2.0):
* DOp have following properties:
+ Template as first class citizen
+ Support of Immediate template
+ Direct parser and conversion between Asm/Hex
+ Replace deku (and it's associated endianess limitation) by
+ bitfield_struct and manual parsing
* IOp have following properties:
+ Support various number of Destination
+ Support various number of Sources
+ Support various number of Immediat values
+ Support of multiple bitwidth (Not implemented yet in the Fpga
firmware)
Details could be view in `backends/tfhe-hpu-backend/Readme.md`
This commit is contained in:
1
.github/actionlint.yaml
vendored
1
.github/actionlint.yaml
vendored
@@ -6,6 +6,7 @@ self-hosted-runner:
|
||||
- large_windows_16_latest
|
||||
- large_ubuntu_16
|
||||
- large_ubuntu_16-22.04
|
||||
- v80-desktop
|
||||
# Configuration variables in array of strings defined in your repository or
|
||||
# organization. `null` means disabling configuration variables check.
|
||||
# Empty array means no configuration variable is allowed.
|
||||
|
||||
88
.github/workflows/benchmark_hpu_integer.yml
vendored
Normal file
88
.github/workflows/benchmark_hpu_integer.yml
vendored
Normal file
@@ -0,0 +1,88 @@
|
||||
# Run all integer benchmarks on a permanent HPU instance and return parsed results to Slab CI bot.
|
||||
name: Hpu Integer Benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
integer-benchmarks-hpu:
|
||||
name: Execute integer & erc20 benchmarks for HPU backend
|
||||
runs-on: v80-desktop
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
steps:
|
||||
# Needed as long as hw_regmap repository is private
|
||||
- name: Configure SSH
|
||||
uses: webfactory/ssh-agent@a6f90b1f127823b31d4d4a8d96047790581349bd # v0.9.1
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make bench_integer_hpu
|
||||
make bench_hlapi_erc20_hpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
|
||||
--database tfhe_rs \
|
||||
--hardware "hpu_x1" \
|
||||
--backend hpu \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch "${REF_NAME}" \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${BENCH_DATE}" \
|
||||
--walk-subdirs
|
||||
env:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_benchmarks
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
5
.github/workflows/cargo_build.yml
vendored
5
.github/workflows/cargo_build.yml
vendored
@@ -94,5 +94,10 @@ jobs:
|
||||
run: |
|
||||
make build_tfhe_coverage
|
||||
|
||||
- name: Run Hpu pcc checks
|
||||
if: ${{ contains(matrix.os, 'ubuntu') }}
|
||||
run: |
|
||||
make pcc_hpu
|
||||
|
||||
# The wasm build check is a bit annoying to set-up here and is done during the tests in
|
||||
# aws_tfhe_tests.yml
|
||||
|
||||
4
.github/workflows/cargo_test_fft.yml
vendored
4
.github/workflows/cargo_test_fft.yml
vendored
@@ -51,7 +51,7 @@ jobs:
|
||||
runs-on: ${{ matrix.runner_type }}
|
||||
strategy:
|
||||
matrix:
|
||||
runner_type: [ubuntu-latest, macos-latest, windows-latest]
|
||||
runner_type: [ ubuntu-latest, macos-latest, windows-latest ]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
@@ -82,7 +82,7 @@ jobs:
|
||||
runs-on: ${{ matrix.runner_type }}
|
||||
strategy:
|
||||
matrix:
|
||||
runner_type: [ubuntu-latest, macos-latest, windows-latest]
|
||||
runner_type: [ ubuntu-latest, macos-latest, windows-latest ]
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
|
||||
4
.github/workflows/cargo_test_ntt.yml
vendored
4
.github/workflows/cargo_test_ntt.yml
vendored
@@ -51,7 +51,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
os: [ ubuntu-latest, macos-latest, windows-latest ]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
os: [ ubuntu-latest, macos-latest, windows-latest ]
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
|
||||
73
.github/workflows/hpu_hlapi_tests.yml
vendored
Normal file
73
.github/workflows/hpu_hlapi_tests.yml
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
# Test tfhe-fft
|
||||
name: Cargo Test HLAPI HPU
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
|
||||
permissions: { }
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
outputs:
|
||||
hpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.hpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
hpu:
|
||||
- tfhe/Cargo.toml
|
||||
- Makefile
|
||||
- backends/tfhe-hpu-backend/**
|
||||
- mockups/tfhe-hpu-mockup/**
|
||||
|
||||
cargo-tests-hpu:
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.hpu_test == 'true'
|
||||
runs-on: large_ubuntu_16
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Install Just
|
||||
run: |
|
||||
cargo install just
|
||||
|
||||
- name: Test HLAPI HPU
|
||||
run: |
|
||||
source setup_hpu.sh
|
||||
just -f mockups/tfhe-hpu-mockup/Justfile BUILD_PROFILE=release mockup &
|
||||
make HPU_CONFIG=sim test_high_level_api_hpu
|
||||
|
||||
105
.github/workflows/make_release_hpu.yml
vendored
Normal file
105
.github/workflows/make_release_hpu.yml
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
name: Publish HPU release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: "Dry-run"
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify_tag:
|
||||
uses: ./.github/workflows/verify_tagged_commit.yml
|
||||
secrets:
|
||||
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify_tag
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-hpu-backend
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
# Needed to create the provenance via GitHub OIDC
|
||||
id-token: write
|
||||
# Needed to upload assets/artifacts
|
||||
contents: write
|
||||
with:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: Publish tfhe-hpu-backend Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [verify_tag, package] # for comparing hashes
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-hpu-backend --token "${CRATES_TOKEN}" ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Slack notification (hashes comparison)
|
||||
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_MESSAGE: "SLSA tfhe-hpu-backend crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-hpu-backend release failed: (${{ env.ACTION_RUN_URL }})"
|
||||
2
.lfsconfig
Normal file
2
.lfsconfig
Normal file
@@ -0,0 +1,2 @@
|
||||
[lfs]
|
||||
fetchexclude = *
|
||||
@@ -9,10 +9,12 @@ members = [
|
||||
"tasks",
|
||||
"tfhe-csprng",
|
||||
"backends/tfhe-cuda-backend",
|
||||
"backends/tfhe-hpu-backend",
|
||||
"utils/tfhe-versionable",
|
||||
"utils/tfhe-versionable-derive",
|
||||
"utils/param_dedup",
|
||||
"tests",
|
||||
"mockups/tfhe-hpu-mockup",
|
||||
]
|
||||
|
||||
exclude = [
|
||||
|
||||
79
Makefile
79
Makefile
@@ -2,6 +2,7 @@ SHELL:=$(shell /usr/bin/env which bash)
|
||||
OS:=$(shell uname)
|
||||
RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
|
||||
CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
|
||||
CARGO_BUILD_JOBS=default
|
||||
CPU_COUNT=$(shell ./scripts/cpu_count.sh)
|
||||
RS_BUILD_TOOLCHAIN:=stable
|
||||
CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
|
||||
@@ -55,6 +56,9 @@ REGEX_PATTERN?=''
|
||||
TFHECUDA_SRC=backends/tfhe-cuda-backend/cuda
|
||||
TFHECUDA_BUILD=$(TFHECUDA_SRC)/build
|
||||
|
||||
# tfhe-hpu-backend
|
||||
HPU_CONFIG=v80
|
||||
|
||||
# Exclude these files from coverage reports
|
||||
define COVERAGE_EXCLUDED_FILES
|
||||
--exclude-files apps/trivium/src/trivium/* \
|
||||
@@ -301,6 +305,13 @@ check_gpu: install_rs_check_toolchain
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: clippy_hpu # Run clippy lints on tfhe with "hpu" enabled
|
||||
clippy_hpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean,shortint,integer,internal-keycache,hpu,pbs-stats,extended-types \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: fix_newline # Fix newline at end of file issues to be UNIX compliant
|
||||
fix_newline: check_linelint_installed
|
||||
linelint -a .
|
||||
@@ -473,6 +484,11 @@ clippy_cuda_backend: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-cuda-backend -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_hpu_backend # Run clippy lints on the tfhe-hpu-backend
|
||||
clippy_hpu_backend: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-hpu-backend -- --no-deps -D warnings
|
||||
|
||||
.PHONY: check_rust_bindings_did_not_change # Check rust bindings are up to date for tfhe-cuda-backend
|
||||
check_rust_bindings_did_not_change:
|
||||
cargo build -p tfhe-cuda-backend && "$(MAKE)" fmt_gpu && \
|
||||
@@ -702,6 +718,28 @@ test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_n
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_integer_hpu_ci # Run the tests for integer ci on hpu backend
|
||||
test_integer_hpu_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
cargo test --release -p $(TFHE_SPEC) --features hpu-v80 --test hpu
|
||||
|
||||
.PHONY: test_integer_hpu_mockup_ci # Run the tests for integer ci on hpu backend and mockup
|
||||
test_integer_hpu_mockup_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
source ./setup_hpu.sh --config sim ; \
|
||||
cargo build --release --bin hpu_mockup; \
|
||||
coproc target/release/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_pfail64_psi64.toml > mockup.log; \
|
||||
HPU_TEST_ITER=1 \
|
||||
cargo test --profile devo -p $(TFHE_SPEC) --features hpu --test hpu -- u32 && \
|
||||
kill %1
|
||||
|
||||
.PHONY: test_integer_hpu_mockup_ci_fast # Run the quick tests for integer ci on hpu backend and mockup.
|
||||
test_integer_hpu_mockup_ci_fast: install_rs_check_toolchain install_cargo_nextest
|
||||
source ./setup_hpu.sh --config sim ; \
|
||||
cargo build --profile devo --bin hpu_mockup; \
|
||||
coproc target/devo/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_fast.toml > mockup.log; \
|
||||
HPU_TEST_ITER=1 \
|
||||
cargo test --profile devo -p $(TFHE_SPEC) --features hpu --test hpu -- u32 && \
|
||||
kill %1
|
||||
|
||||
.PHONY: test_boolean # Run the tests of the boolean module
|
||||
test_boolean: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
@@ -857,6 +895,22 @@ test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
|
||||
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) \
|
||||
-E "test(/high_level_api::.*gpu.*/)"
|
||||
|
||||
test_high_level_api_hpu: install_rs_build_toolchain install_cargo_nextest
|
||||
ifeq ($(HPU_CONFIG), v80)
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
--build-jobs=$(CARGO_BUILD_JOBS) \
|
||||
--test-threads=1 \
|
||||
--features=integer,internal-keycache,hpu,hpu-v80 -p $(TFHE_SPEC) \
|
||||
-E "test(/high_level_api::.*hpu.*/)"
|
||||
else
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
--build-jobs=$(CARGO_BUILD_JOBS) \
|
||||
--test-threads=1 \
|
||||
--features=integer,internal-keycache,hpu -p $(TFHE_SPEC) \
|
||||
-E "test(/high_level_api::.*hpu.*/)"
|
||||
endif
|
||||
|
||||
|
||||
.PHONY: test_strings # Run the tests for strings ci
|
||||
test_strings: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
@@ -1100,6 +1154,12 @@ clippy_bench_gpu: install_rs_check_toolchain
|
||||
--features=gpu,shortint,integer,internal-keycache,nightly-avx512,pbs-stats,zk-pok \
|
||||
-p tfhe-benchmark -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_bench_hpu # Run clippy lints on tfhe-benchmark
|
||||
clippy_bench_hpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=hpu,shortint,integer,internal-keycache,pbs-stats\
|
||||
-p tfhe-benchmark -- --no-deps -D warnings
|
||||
|
||||
.PHONY: print_doc_bench_parameters # Print parameters used in doc benchmarks
|
||||
print_doc_bench_parameters:
|
||||
RUSTFLAGS="" cargo run --example print_doc_bench_parameters \
|
||||
@@ -1133,6 +1193,14 @@ bench_signed_integer_gpu: install_rs_check_toolchain
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_integer_hpu # Run benchmarks for integer on HPU backend
|
||||
bench_integer_hpu: install_rs_check_toolchain
|
||||
source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark -- --quick
|
||||
|
||||
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
|
||||
bench_integer_compression: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
@@ -1324,6 +1392,14 @@ bench_hlapi_dex_gpu: install_rs_check_toolchain
|
||||
--bench hlapi-dex \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_hlapi_erc20_hpu # Run benchmarks for ECR20 operations on HPU
|
||||
bench_hlapi_erc20_hpu: install_rs_check_toolchain
|
||||
source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark -- --quick
|
||||
|
||||
.PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
|
||||
bench_tfhe_zk_pok: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
@@ -1423,6 +1499,9 @@ tfhe_lints
|
||||
pcc_gpu: check_rust_bindings_did_not_change clippy_rustdoc_gpu \
|
||||
clippy_gpu clippy_cuda_backend clippy_bench_gpu check_compile_tests_benches_gpu
|
||||
|
||||
.PHONY: pcc_hpu # pcc stands for pre commit checks for HPU compilation
|
||||
pcc_hpu: clippy_hpu clippy_hpu_backend test_integer_hpu_mockup_ci_fast
|
||||
|
||||
.PHONY: fpcc # pcc stands for pre commit checks, the f stands for fast
|
||||
fpcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
|
||||
check_md_docs_are_tested clippy_fast check_compile_tests
|
||||
|
||||
@@ -11,11 +11,13 @@ extend-ignore-identifiers-re = [
|
||||
# Example with string replacing "hello" with "herlo"
|
||||
"herlo",
|
||||
# Example in trivium
|
||||
"C9217BA0D762ACA1"
|
||||
"C9217BA0D762ACA1",
|
||||
"0x[0-9a-fA-F]+"
|
||||
]
|
||||
|
||||
[files]
|
||||
extend-exclude = [
|
||||
"backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cu",
|
||||
"backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu",
|
||||
"backends/tfhe-hpu-backend/config_store/**/*.link_summary",
|
||||
]
|
||||
|
||||
3
backends/tfhe-hpu-backend/.gitattributes
vendored
Normal file
3
backends/tfhe-hpu-backend/.gitattributes
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
*.xclbin filter=lfs diff=lfs merge=lfs -text
|
||||
*.pdi filter=lfs diff=lfs merge=lfs -text
|
||||
python/lib/example.json filter=lfs diff=lfs merge=lfs -text
|
||||
3
backends/tfhe-hpu-backend/.gitignore
vendored
Normal file
3
backends/tfhe-hpu-backend/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
ngt_*
|
||||
config
|
||||
kogge_cfg.toml
|
||||
88
backends/tfhe-hpu-backend/Cargo.toml
Normal file
88
backends/tfhe-hpu-backend/Cargo.toml
Normal file
@@ -0,0 +1,88 @@
|
||||
[package]
|
||||
name = "tfhe-hpu-backend"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
license = "BSD-3-Clause-Clear"
|
||||
description = "HPU implementation on FPGA of TFHE-rs primitives."
|
||||
homepage = "https://www.zama.ai/"
|
||||
documentation = "https://docs.zama.ai/tfhe-rs"
|
||||
repository = "https://github.com/zama-ai/tfhe-rs"
|
||||
readme = "README.md"
|
||||
keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography", "hardware", "fpga"]
|
||||
|
||||
[features]
|
||||
hw-xrt = []
|
||||
hw-v80 = []
|
||||
io-dump = ["num-traits"]
|
||||
rtl_graph = ["dot2"]
|
||||
utils = ["clap", "clap-num", "bitvec", "serde_json"]
|
||||
|
||||
[build-dependencies]
|
||||
cxx-build = "1.0"
|
||||
|
||||
[dependencies]
|
||||
cxx = "1.0"
|
||||
hw_regmap = "0.1.0"
|
||||
|
||||
strum = { version = "0.26.2", features = ["derive"] }
|
||||
strum_macros = "0.26.2"
|
||||
enum_dispatch = "0.3.13"
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
toml = { version = "0.8.*", features = [] }
|
||||
paste = "1.0.15"
|
||||
thiserror = "1.0.61"
|
||||
bytemuck = "1.16.0"
|
||||
anyhow = "1.0.82"
|
||||
lazy_static = "1.4.0"
|
||||
rand = "0.8.5"
|
||||
regex = "1.10.4"
|
||||
bitflags = { version = "2.5.0", features = ["serde"] }
|
||||
itertools = "0.11.0"
|
||||
lru = "0.12.3"
|
||||
bitfield-struct = "0.10.0"
|
||||
crossbeam = { version = "0.8.4", features = ["crossbeam-queue"] }
|
||||
rayon = { workspace = true }
|
||||
|
||||
# Dependencies used for Sim feature
|
||||
ipc-channel = "0.18.3"
|
||||
|
||||
# Dependencies used for debug feature
|
||||
num-traits = { version = "*", optional = true }
|
||||
clap = { version = "4.4.4", features = ["derive"], optional = true }
|
||||
clap-num = { version = "1.1.1", optional = true }
|
||||
nix = { version = "0.29.0", features = ["ioctl", "uio"] }
|
||||
|
||||
# Dependencies used for rtl_graph features
|
||||
dot2 = { version = "*", optional = true }
|
||||
|
||||
bitvec = { version = "*", optional = true }
|
||||
serde_json = { version = "*", optional = true }
|
||||
|
||||
# Binary for manual debugging
|
||||
# Enable to access Hpu register and drive some custom sequence by hand
|
||||
[[bin]]
|
||||
name = "hputil"
|
||||
path = "src/utils/hputil.rs"
|
||||
required-features = ["utils"]
|
||||
|
||||
# Binary for asm manipulation
|
||||
# Enable to convert back and forth between asm/hex format
|
||||
[[bin]]
|
||||
name = "dop_fmt"
|
||||
path = "src/utils/dop_fmt.rs"
|
||||
required-features = ["utils"]
|
||||
|
||||
# Enable to convert back and forth between asm/hex format
|
||||
[[bin]]
|
||||
name = "iop_fmt"
|
||||
path = "src/utils/iop_fmt.rs"
|
||||
required-features = ["utils"]
|
||||
|
||||
# Firmware generation
|
||||
# Enable to expand IOp in list of Dop for inspection
|
||||
[[bin]]
|
||||
name = "fw"
|
||||
path = "src/utils/fw.rs"
|
||||
required-features = ["utils"]
|
||||
28
backends/tfhe-hpu-backend/LICENSE
Normal file
28
backends/tfhe-hpu-backend/LICENSE
Normal file
@@ -0,0 +1,28 @@
|
||||
BSD 3-Clause Clear License
|
||||
|
||||
Copyright © 2025 ZAMA.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or other
|
||||
materials provided with the distribution.
|
||||
|
||||
3. Neither the name of ZAMA nor the names of its contributors may be used to endorse
|
||||
or promote products derived from this software without specific prior written permission.
|
||||
|
||||
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE.
|
||||
THIS SOFTWARE IS PROVIDED BY THE ZAMA AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
ZAMA OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
|
||||
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
261
backends/tfhe-hpu-backend/Readme.md
Normal file
261
backends/tfhe-hpu-backend/Readme.md
Normal file
@@ -0,0 +1,261 @@
|
||||
# TFHE-hpu-backend
|
||||
|
||||
## Brief
|
||||
The `tfhe-hpu-backend` holds the code to interface with the HPU accelerator of TFHE.
|
||||
It contains a `HpuDevice` abstraction that enables easy configuration and dispatching of TFHE operations on the HPU accelerator.
|
||||
|
||||
The user API exposes the following functions for hardware setup:
|
||||
- `HpuDevice::new`, `HpuDevice::from_config`: Instantiates abstraction device from configuration file.
|
||||
- `HpuDevice::init`: Configures and uploads the required public material.
|
||||
- `new_var_from`: Creates a HPU ciphertext from `tfhe-rs` ciphertext.
|
||||
|
||||
HPU device could also be used from `integer` with the help of the following function:
|
||||
- `tfhe::integer::hpu::init_device`: Init given HPU device with server key.
|
||||
- `tfhe::integer::hpu::ciphertext::HpuRadixCiphertext::from_radix_ciphertext`: Convert a CpuRadixCiphertext in it's HPU counterpart.
|
||||
|
||||
HPU device could also be used seamlessly from `hl-api` by setting up a thread-local HPU server key:
|
||||
- `tfhe::Config::from_hpu_device`: Extract hl-api configuration from HpuDevice.
|
||||
- `tfhe::set_server_key`: Register the Hpu server key in the current thread.
|
||||
|
||||
HPU variables could also be created from a `high-level-api` object, with the help of the `hw-xfer` feature.
|
||||
This implements a trait that enables `clone_on`, `mv_on` `FheUint` object on the HPU accelerator, and cast back `from` them.
|
||||
|
||||
These objects implement the `std::ops` trait and could be used to dispatch operations on HPU hardware.
|
||||
|
||||
### Backend structure
|
||||
`tfhe-hpu-backend` is split in various modules:
|
||||
- `entities`: Defines structure handled by HPU accelerator. Conversion traits from/into those objects are implemented in `tfhe-rs`.
|
||||
- `asm`: Describes assembly-like language for the HPU. It enables abstract HPU behavior and easily updates it through micro-code.
|
||||
- `fw`: Abstraction to help the micro-code designer. Uses a simple rust program for describing new HPU operations. Helps with register/heap management.
|
||||
- `interface`:
|
||||
+ `device`: High-level structure that exposes the User API.
|
||||
+ `backend`: Inner private structure that contains HPU modules
|
||||
+ `variable`: Wraps HPU ciphertexts. It enables to hook an hardware object lifetime within the `rust` borrow-checker.
|
||||
+ `memory`: Handles on-board memory allocation and synchronization
|
||||
+ `config`: Helps to configure HPU accelerator through a TOML configuration file
|
||||
+ `cmd`: Translates operation over `variable` in concrete HPU commands
|
||||
+ `regmap`: Communicates with the HPU internal register with ease.
|
||||
+ `rtl`: Defines concrete `rust` structure populated from HPU's status/configuration registers
|
||||
|
||||
|
||||
Below is an overview of the internal structure of the Backend.
|
||||

|
||||
|
||||
This picture depicts the internal modules of `tfhe-hpu-backend`, Device is the main entry point for the user. Its lifecycle is as follows:
|
||||
|
||||
1. Create HpuDevice, open link with the associated FPGA. Configure associated drivers and upload the bitstream. Read FPGA registers to extract supported configuration and features. Build Firmware conversion table (IOp -> DOps stream).
|
||||
|
||||
2. Allocate required memory chunks in the on-board memory. Upload public material required by TFHE computation.
|
||||
|
||||
3. Create HPU variables that handle TFHE Ciphertexts. It wraps TFHE Ciphertext with required internal resources and enforces the correct lifetime management. This abstraction enforces that during the variable lifecycle all required resources are valid.
|
||||
|
||||
4. Users could trigger HPU operation from the HPU variable.
|
||||
Variable abstraction enforces that required objects are correctly synced on the hardware and converts each operation in a concrete HPU command.
|
||||
When HPU operation is acknowledged by the hardware, the internal state of the associated variable is updated.
|
||||
This mechanism enables asynchronous operation and minimal amount of Host to/from HW memory transfer.
|
||||
This mechanism also enables offloading a computation graph to the HPU and requires a synchronization only on the final results.
|
||||
|
||||
## Example
|
||||
### Configuration file
|
||||
HPU configuration knobs are gathered in a TOML configuration file. This file describes the targeted FPGA with its associated configuration:
|
||||
```toml
|
||||
[fpga] # FPGA target
|
||||
# Register layout in the FPGA
|
||||
regmap=["${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_cfg_1in3.toml",
|
||||
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_cfg_3in3.toml",
|
||||
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_prc_1in3.toml",
|
||||
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_prc_3in3.toml"]
|
||||
polling_us=10
|
||||
[fpga.ffi.V80] # Hardware properties
|
||||
ami_dev="/dev/ami1" # Name of ami device
|
||||
qdma_h2c="/dev/qdma${V80_PCIE_DEV}001-MM-0" # QDma host to card device
|
||||
qdma_c2h="/dev/qdma${V80_PCIE_DEV}001-MM-1" # QDma card to host device
|
||||
|
||||
[rtl] # RTL option
|
||||
bpip_used = true # BPIP/IPIP mode
|
||||
bpip_use_opportunism = false # Use strict flush paradigm
|
||||
bpip_timeout = 100_000 # BPIP timeout in clock `cycles`
|
||||
|
||||
[board] # Board configuration
|
||||
ct_mem = 32768 # Number of allocated ciphertext
|
||||
ct_pc = [ # Memory used for ciphertext
|
||||
{Hbm= {pc=32}},
|
||||
{Hbm= {pc=33}},
|
||||
]
|
||||
heap_size = 16384 # Number of slots reserved for heap
|
||||
|
||||
lut_mem = 256 # Number of allocated LUT table
|
||||
lut_pc = {Hbm={pc=34}} # Memory used for LUT
|
||||
|
||||
fw_size= 16777216 # Size in byte of the Firmware translation table
|
||||
fw_pc = {Ddr= {offset= 0x3900_0000}} # Memory used for firmware translation table
|
||||
|
||||
bsk_pc = [ # Memory used for Bootstrapping key
|
||||
{Hbm={pc=8}},
|
||||
{Hbm={pc=12}},
|
||||
{Hbm={pc=24}},
|
||||
{Hbm={pc=28}},
|
||||
{Hbm={pc=40}},
|
||||
{Hbm={pc=44}},
|
||||
{Hbm={pc=56}},
|
||||
{Hbm={pc=60}}
|
||||
]
|
||||
|
||||
ksk_pc = [ # Memory used for Keyswitching key
|
||||
{Hbm={pc=0}},
|
||||
{Hbm={pc=1}},
|
||||
{Hbm={pc=2}},
|
||||
{Hbm={pc=3}},
|
||||
{Hbm={pc=4}},
|
||||
{Hbm={pc=5}},
|
||||
{Hbm={pc=6}},
|
||||
{Hbm={pc=7}},
|
||||
{Hbm={pc=16}},
|
||||
{Hbm={pc=17}},
|
||||
{Hbm={pc=18}},
|
||||
{Hbm={pc=19}},
|
||||
{Hbm={pc=20}},
|
||||
{Hbm={pc=21}},
|
||||
{Hbm={pc=22}},
|
||||
{Hbm={pc=23}}
|
||||
]
|
||||
|
||||
trace_pc = {Hbm={pc=35}} # Memory used for trace log
|
||||
trace_depth = 32 # Size of Memory in MiB allocated for trace log
|
||||
|
||||
[firmware] # Firmware properties
|
||||
implementation = "Llt" # Firmware flavor to use
|
||||
integer_w=[4,6,8,10,12,14,16,32,64,128] # List of supported IOp width
|
||||
min_batch_size = 11 # Minimum batch size for maximum throughput
|
||||
kogge_cfg = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/kogge_cfg.toml"
|
||||
custom_iop.'IOP[0]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_0.asm"
|
||||
|
||||
# Default firmware configuration. Could be edited on per-IOp basis
|
||||
[firmware.op_cfg.default]
|
||||
fill_batch_fifo = true
|
||||
min_batch_size = false
|
||||
use_tiers = false
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
```
|
||||
|
||||
### Device setup
|
||||
Following code snippet shows how to instantiate and configure a `HpuDevice`:
|
||||
```rust
|
||||
// Following code snippets used the HighLevelApi abstraction
|
||||
// Instantiate HpuDevice --------------------------------------------------
|
||||
let hpu_device = HpuDevice::from_config(&args.config.expand());
|
||||
|
||||
// Generate keys ----------------------------------------------------------
|
||||
let config = Config::from_hpu_device(&hpu_device);
|
||||
|
||||
let cks = ClientKey::generate(config);
|
||||
let csks = CompressedServerKey::new(&cks);
|
||||
|
||||
// Register HpuDevice and key as thread-local engine
|
||||
set_server_key((hpu_device, csks));
|
||||
```
|
||||
|
||||
### Clone CPU ciphertext on HPU
|
||||
Following code snippet shows how to convert CPU ciphertext in HPU one:
|
||||
``` rust
|
||||
// Draw random value as input
|
||||
let a = rand::thread_rng().gen_range(0..u8::MAX);
|
||||
|
||||
// Encrypt them on Cpu side
|
||||
let a_fhe = FheUint8::encrypt(a, &cks);
|
||||
|
||||
// Clone a ciphertext and move them in HpuWorld
|
||||
// NB: Data doesn't move over Pcie at this stage
|
||||
// Data are only arranged in Hpu ordered an copy in the host internal buffer
|
||||
let a_hpu = a_fhe.clone_on(&hpu_device);
|
||||
```
|
||||
|
||||
### Dispatch operation on HPU
|
||||
Once registered as thread-local engine, HighLevel FheUint are converted in Hpu format.
|
||||
Following code snippets show how to start operation on HPU:
|
||||
|
||||
``` rust
|
||||
// Sum -------------------------------------------------------------
|
||||
// Generate random inputs value and compute expected result
|
||||
let in_a = rng.gen_range(0..u64::max_value());
|
||||
let in_b = rng.gen_range(0..u64::max_value());
|
||||
let clear_sum_ab = in_a.wrapping_add(in_b);
|
||||
|
||||
// Encrypt input value
|
||||
let fhe_a = FheUint64::encrypt(in_a, cks);
|
||||
let fhe_b = FheUint64::encrypt(in_b, cks);
|
||||
|
||||
// Triggered operation on HPU through hl_api
|
||||
let fhe_sum_ab = fhe_a+fhe_b;
|
||||
|
||||
// Decrypt values
|
||||
let dec_sum_ab: u64 = fhe_sum_ab.decrypt(cks);
|
||||
```
|
||||
|
||||
## Pre-made Examples
|
||||
There are some example applications already available in `tfhe/examples/hpu`:
|
||||
* hpu_hlapi: Depict the used of HPU device through HighLevelApi.
|
||||
* hpu_bench: Depict the used of HPU device through Integer abstraction level.
|
||||
|
||||
In order to run those applications on hardware, user must build from the project root (i.e `tfhe-rs-internal`) with `hpu-v80` features:
|
||||
|
||||
> NB: Running examples required to have correctly pulled the `.pdi` files. Those files, due to their size, are backed by git-lfs and disabled by default.
|
||||
> In order to retrieve them, use the following command:
|
||||
> ```bash
|
||||
> git lfs pull --include="*" --exclude=""
|
||||
> ```
|
||||
|
||||
``` bash
|
||||
cargo build --release --features="hpu-v80" --example hpu_hlapi --example hpu_bench
|
||||
# Correctly setup environment with setup_hpu.sh script
|
||||
source setup_hpu.sh --config v80 --init-qdma
|
||||
./target/release/examples/hpu_bench --integer-w 64 --integer-w 32 --iop MUL --iter 10
|
||||
./target/release/examples/hpu_hlapi
|
||||
```
|
||||
|
||||
## Test framework
|
||||
There is also a set of tests backed in tfhe-rs. Tests are gather in testbundle over various integer width.
|
||||
Those tests have 5 sub-kind:
|
||||
* `alu`: Run and check all ct x ct IOp
|
||||
* `alus`: Run and check all ct x scalar IOp
|
||||
* `bitwise`: Run and check all bitwise IOp
|
||||
* `cmp`: Run and check all comparison IOp
|
||||
* `ternary`: Run and check ternary operation
|
||||
* `algo`: Run and check IOp dedicated to offload small algorithms
|
||||
|
||||
|
||||
Snippets below give some example of command that could be used for testing:
|
||||
``` bash
|
||||
# Correctly setup environment with setup_hpu.sh script
|
||||
source setup_hpu.sh --config v80 --init-qdma
|
||||
|
||||
# Run all sub-kind for 64b integer width
|
||||
cargo test --release --features="hpu-v80" --test hpu -- u64
|
||||
|
||||
# Run only `bitwise` sub-kind for all integer width IOp
|
||||
cargo test --release --features="hpu-v80" --test hpu -- bitwise
|
||||
```
|
||||
|
||||
## Benches framework
|
||||
HPU is completely integrated in tfhe benchmark system. Performances results could be extracted from HighLevelApi or Integer Api.
|
||||
Three benchmarks could be started, through the following Makefile target for simplicity:
|
||||
``` bash
|
||||
# Do not forget to correctly set environment before hand
|
||||
source setup_hpu.sh --config v80 --init-qdma
|
||||
|
||||
# Run hlapi benches
|
||||
make test_high_level_api_hpu
|
||||
|
||||
# Run hlapi erc20 benches
|
||||
make bench_hlapi_erc20_hpu
|
||||
|
||||
# Run integer level benches
|
||||
make bench_integer_hpu
|
||||
```
|
||||
|
||||
## Eager to start without real Hardware ?
|
||||
You are still waiting your FPGA board and are frustrated by lead time ?
|
||||
Don't worry, you have backed-up. A dedicated simulation infrastructure with accurate performance estimation is available in tfhe-rs.
|
||||
You can use it on any linux/MacOs to test HPU integration within tfhe-rs and optimized your application for HPU target.
|
||||
Simply through an eye to [Hpu mockup](../../mockups/tfhe-hpu-mockup/Reaadme.md), and follow the instruction.
|
||||
26
backends/tfhe-hpu-backend/build.rs
Normal file
26
backends/tfhe-hpu-backend/build.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
fn main() {
|
||||
if cfg!(feature = "hw-xrt") {
|
||||
println!("cargo:rustc-link-search=/opt/xilinx/xrt/lib");
|
||||
println!("cargo:rustc-link-lib=dylib=stdc++");
|
||||
println!("cargo:rustc-link-lib=dl");
|
||||
println!("cargo:rustc-link-lib=rt");
|
||||
println!("cargo:rustc-link-lib=uuid");
|
||||
println!("cargo:rustc-link-lib=dylib=xrt_coreutil");
|
||||
|
||||
cxx_build::bridge("src/ffi/xrt/mod.rs")
|
||||
.file("src/ffi/xrt/cxx/hpu_hw.cc")
|
||||
.file("src/ffi/xrt/cxx/mem_zone.cc")
|
||||
.flag_if_supported("-std=c++23")
|
||||
.include("/opt/xilinx/xrt/include") // Enhance: support parsing bash env instead of hard path
|
||||
.flag("-fmessage-length=0")
|
||||
.compile("hpu-hw-ffi");
|
||||
|
||||
println!("cargo:rerun-if-changed=src/ffi/xrt/mod.rs");
|
||||
println!("cargo:rerun-if-changed=src/ffi/xrt/cxx/hpu_hw.cc");
|
||||
println!("cargo:rerun-if-changed=src/ffi/xrt/cxx/hpu_hw.h");
|
||||
println!("cargo:rerun-if-changed=src/ffi/xrt/cxx/mem_zone.cc");
|
||||
println!("cargo:rerun-if-changed=src/ffi/xrt/cxx/mem_zone.h");
|
||||
} else {
|
||||
// Simulation ffi -> nothing to do
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
# CUST_0
|
||||
# Simple IOp to check the xfer between Hpu/Cpu
|
||||
# Construct constant in dest slot -> 249 (0xf9)
|
||||
SUB R0 R0 R0
|
||||
ADDS R0 R0 1
|
||||
ST TD[0].0 R0
|
||||
SUB R1 R1 R1
|
||||
ADDS R1 R1 2
|
||||
ST TD[0].1 R1
|
||||
SUB R2 R2 R2
|
||||
ADDS R2 R2 3
|
||||
ST TD[0].2 R2
|
||||
SUB R3 R3 R3
|
||||
ADDS R3 R3 3
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,11 @@
|
||||
# CUST_1
|
||||
# Simple IOp to check the xfer between Hpu/Cpu
|
||||
# Dest <- Src_a
|
||||
LD R0 TS[0].0
|
||||
LD R1 TS[0].1
|
||||
LD R2 TS[0].2
|
||||
LD R3 TS[0].3
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,25 @@
|
||||
; CUST_8
|
||||
; Simple IOp to check the ALU operation
|
||||
; Dst[0].0 <- Src[0].0 + Src[1].0
|
||||
LD R1 TS[0].0
|
||||
LD R2 TS[1].0
|
||||
ADD R0 R1 R2
|
||||
ST TD[0].0 R0
|
||||
|
||||
; Dst[0].1 <- Src[0].1 + Src[1].1
|
||||
LD R5 TS[0].1
|
||||
LD R6 TS[1].1
|
||||
ADD R4 R5 R6
|
||||
ST TD[0].2 R4
|
||||
|
||||
; Dst[0].2 <- Src[0].2 + Src[1].2
|
||||
LD R9 TS[0].2
|
||||
LD R10 TS[1].2
|
||||
ADD R8 R9 R10
|
||||
ST TD[0].2 R8
|
||||
|
||||
; Dst[0].3 <- Src[0].3 + Src[1].3
|
||||
LD R13 TS[0].3
|
||||
LD R14 TS[1].3
|
||||
ADD R12 R13 R14
|
||||
ST TD[0].3 R0
|
||||
@@ -0,0 +1,6 @@
|
||||
# CUST_16
|
||||
# Simple IOp to check PBS behavior
|
||||
# Dest <- PBSNone(Src_a.0)
|
||||
LD R0 TS[0].0
|
||||
PBS_F R0 R0 PbsNone
|
||||
ST TD[0].0 R0
|
||||
@@ -0,0 +1,15 @@
|
||||
# CUST_17
|
||||
# Simple IOp to check PBS behavior
|
||||
# Dest <- PBSNone(Src_a)
|
||||
LD R0 TS[0].0
|
||||
PBS R0 R0 PbsNone
|
||||
ST TD[0].0 R0
|
||||
LD R1 TS[0].1
|
||||
PBS R1 R1 PbsNone
|
||||
ST TD[0].1 R1
|
||||
LD R2 TS[0].2
|
||||
PBS R2 R2 PbsNone
|
||||
ST TD[0].2 R2
|
||||
LD R3 TS[0].3
|
||||
PBS_F R3 R3 PbsNone
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,23 @@
|
||||
; CUST_18
|
||||
; Simple IOp to check extraction pattern
|
||||
; Correct result:
|
||||
; * Dst[0,1] <- Src[0][0,1]
|
||||
; * Dst[2,3] <- Src[1][0,1]
|
||||
|
||||
; Pack Src[0][0,1] with a Mac and extract Carry/Msg in Dst[0][0,1]
|
||||
LD R0 TS[0].0
|
||||
LD R1 TS[0].1
|
||||
MAC R3 R1 R0 4
|
||||
PBS R4 R3 PbsMsgOnly
|
||||
PBS R5 R3 PbsCarryInMsg
|
||||
ST TD[0].0 R4
|
||||
ST TD[0].1 R5
|
||||
|
||||
; Pack Src[1][0,1] with a Mac and extract Carry/Msg in Dst[0][2,3]
|
||||
LD R10 TS[1].0
|
||||
LD R11 TS[1].1
|
||||
MAC R13 R11 R10 4
|
||||
PBS R14 R13 PbsMsgOnly
|
||||
PBS R15 R13 PbsCarryInMsg
|
||||
ST TD[0].2 R14
|
||||
ST TD[0].3 R15
|
||||
@@ -0,0 +1,19 @@
|
||||
; CUST_19
|
||||
; Simple IOp to check PbsMl2
|
||||
; Correct result:
|
||||
; * Dst[0][0] <- Src[0][0]
|
||||
; * Dst[0][1] <- 0
|
||||
; * Dst[0][2] <- Src[0][0] +1
|
||||
; * Dst[0][3] <- 0
|
||||
; i.e Cust_19(0x2) => 0x32
|
||||
|
||||
; Construct a 0 for destination padding
|
||||
SUB R16 R16 R16
|
||||
|
||||
; Apply PbsMl2 on Src[0] result goes in dest[0][0-3] (0-padded)
|
||||
LD R0 TS[0].0
|
||||
PBS_ML2_F R0 R0 PbsTestMany2
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R16
|
||||
ST TD[0].2 R1
|
||||
ST TD[0].3 R16
|
||||
@@ -0,0 +1,11 @@
|
||||
# CUST_2
|
||||
# Simple IOp to check the xfer between Hpu/Cpu
|
||||
# Dest <- Src_b
|
||||
LD R0 TS[1].0
|
||||
LD R1 TS[1].1
|
||||
LD R2 TS[1].2
|
||||
LD R3 TS[1].3
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,22 @@
|
||||
; CUST_20
|
||||
; Simple IOp to check PbsMl4
|
||||
; Correct result:
|
||||
; * Dst[0][0] <- Src[0][0]
|
||||
; * Dst[0][1] <- Src[0][0] +1
|
||||
; * Dst[0][2] <- Src[0][0] +2
|
||||
; * Dst[0][3] <- Src[0][0] +3
|
||||
; i.e Cust_20(0x0) => 0xe4
|
||||
|
||||
SUB R16 R16 R16
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R0
|
||||
ST TD[0].2 R0
|
||||
ST TD[0].3 R0
|
||||
|
||||
; Apply PbsMl4 on Src[0] result goes in dest[0][0-3]
|
||||
LD R0 TS[0].0
|
||||
PBS_ML4_F R0 R0 PbsTestMany4
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,24 @@
|
||||
; CUST_21
|
||||
; Simple IOp to check PbsMl8
|
||||
; WARN: This operation required 16b ct width
|
||||
; Correct result:
|
||||
; * Dst[0][0] <- Src[0][0]
|
||||
; * Dst[0][1] <- Src[0][0] +1
|
||||
; * Dst[0][2] <- Src[0][0] +2
|
||||
; * Dst[0][3] <- Src[0][0] +3
|
||||
; * Dst[0][4] <- Src[0][0] +4
|
||||
; * Dst[0][5] <- Src[0][0] +5
|
||||
; * Dst[0][6] <- Src[0][0] +6
|
||||
; * Dst[0][7] <- Src[0][0] +7
|
||||
|
||||
; Apply PbsMl8 on Src[0] result goes in dest[0][0-7]
|
||||
LD R0 TS[0].0
|
||||
PBS_ML8_F R0 R0 PbsTestMany8
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
ST TD[0].4 R4
|
||||
ST TD[0].5 R5
|
||||
ST TD[0].6 R6
|
||||
ST TD[0].7 R7
|
||||
@@ -0,0 +1,16 @@
|
||||
# CUST_3
|
||||
# Simple IOp to check isc behavior
|
||||
# Generate obvious deps and check that isc correctly issued the dop
|
||||
# Correct result must bu Dest <- Src[0]
|
||||
LD R0 TS[0].0
|
||||
LD R1 TS[0].1
|
||||
LD R2 TS[0].2
|
||||
LD R3 TS[0].3
|
||||
PBS R4 R0 PbsNone
|
||||
ST TD[0].0 R4
|
||||
PBS R4 R1 PbsNone
|
||||
ST TD[0].1 R4
|
||||
PBS R4 R2 PbsNone
|
||||
ST TD[0].2 R4
|
||||
PBS_F R4 R3 PbsNone
|
||||
ST TD[0].3 R4
|
||||
@@ -0,0 +1,19 @@
|
||||
; CUST_8
|
||||
; Simple IOp to check the ALU operation
|
||||
; Dst[0].0 <- Src[0].0 + Src[1].0
|
||||
LD R1 TS[0].0
|
||||
LD R2 TS[1].0
|
||||
ADD R0 R1 R2
|
||||
ST TD[0].0 R0
|
||||
|
||||
; Dst[0].1 <- Src[0].1 - Src[1].1
|
||||
LD R5 TS[0].1
|
||||
LD R6 TS[1].1
|
||||
SUB R4 R5 R6
|
||||
ST TD[0].1 R4
|
||||
|
||||
; Dst[0].2 <- Src[0].2 + (Src[1].2 *4)
|
||||
LD R9 TS[0].2
|
||||
LD R10 TS[1].2
|
||||
MAC R8 R9 R10 4
|
||||
ST TD[0].2 R8
|
||||
@@ -0,0 +1,21 @@
|
||||
; CUST_9
|
||||
; Simple IOp to check the ALU Scalar operation
|
||||
; Dst[0].0 <- Src[0].0 + Imm[0].0
|
||||
LD R1 TS[0].0
|
||||
ADDS R0 R1 TI[0].0
|
||||
ST TD[0].0 R0
|
||||
|
||||
; Dst[0].1 <- Src[0].1 - Imm[0].1
|
||||
LD R5 TS[0].1
|
||||
SUBS R4 R5 TI[0].1
|
||||
ST TD[0].1 R4
|
||||
|
||||
; Dst[0].2 <- Imm[0].2 - Src[0].2
|
||||
LD R9 TS[0].2
|
||||
SSUB R8 R9 TI[0].2
|
||||
ST TD[0].2 R8
|
||||
|
||||
; Dst[0].3 <- Src[0].3 * Imm[0].3
|
||||
LD R13 TS[0].3
|
||||
MULS R12 R13 TI[0].3
|
||||
ST TD[0].3 R12
|
||||
108
backends/tfhe-hpu-backend/config_store/sim/hpu_config.toml
Normal file
108
backends/tfhe-hpu-backend/config_store/sim/hpu_config.toml
Normal file
@@ -0,0 +1,108 @@
|
||||
|
||||
[fpga]
|
||||
regmap=["${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_cfg_1in3.toml",
|
||||
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_cfg_3in3.toml",
|
||||
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_prc_1in3.toml",
|
||||
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_prc_3in3.toml",
|
||||
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/tb_hpu_regif_dummy.toml"]
|
||||
polling_us=100000
|
||||
[fpga.ffi.Sim]
|
||||
ipc_name="/tmp/${USER}/hpu_mockup_ipc"
|
||||
|
||||
[rtl]
|
||||
bpip_use = true
|
||||
bpip_use_opportunism = true
|
||||
bpip_timeout = 100_000
|
||||
|
||||
[board]
|
||||
ct_mem = 32768
|
||||
ct_pc = [
|
||||
{Hbm= {pc=32}},
|
||||
{Hbm= {pc=33}},
|
||||
]
|
||||
heap_size = 16384
|
||||
|
||||
lut_mem = 256
|
||||
lut_pc = {Hbm={pc=34}}
|
||||
|
||||
fw_size= 16777215 # i.e. 16 MiB
|
||||
fw_pc = {Ddr= {offset= 0x3900_0000}} # NB: Allocation must take place in the Discret DDR
|
||||
|
||||
bsk_pc = [
|
||||
{Hbm={pc=8}},
|
||||
{Hbm={pc=12}},
|
||||
{Hbm={pc=24}},
|
||||
{Hbm={pc=28}},
|
||||
{Hbm={pc=40}},
|
||||
{Hbm={pc=44}},
|
||||
{Hbm={pc=56}},
|
||||
{Hbm={pc=60}}
|
||||
]
|
||||
|
||||
ksk_pc = [
|
||||
{Hbm={pc=0}},
|
||||
{Hbm={pc=1}},
|
||||
{Hbm={pc=2}},
|
||||
{Hbm={pc=3}},
|
||||
{Hbm={pc=4}},
|
||||
{Hbm={pc=5}},
|
||||
{Hbm={pc=6}},
|
||||
{Hbm={pc=7}},
|
||||
{Hbm={pc=16}},
|
||||
{Hbm={pc=17}},
|
||||
{Hbm={pc=18}},
|
||||
{Hbm={pc=19}},
|
||||
{Hbm={pc=20}},
|
||||
{Hbm={pc=21}},
|
||||
{Hbm={pc=22}},
|
||||
{Hbm={pc=23}}
|
||||
]
|
||||
|
||||
trace_pc = {Hbm={pc=35}}
|
||||
trace_depth = 32 # In MB
|
||||
|
||||
[firmware]
|
||||
implementation = "Llt"
|
||||
integer_w=[2,4,6,8,10,12,14,16,32,64,128]
|
||||
min_batch_size = 11
|
||||
kogge_cfg = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/kogge_cfg.toml"
|
||||
custom_iop.'IOP[0]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_0.asm"
|
||||
custom_iop.'IOP[1]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_1.asm"
|
||||
custom_iop.'IOP[2]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_2.asm"
|
||||
custom_iop.'IOP[3]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_3.asm"
|
||||
custom_iop.'IOP[8]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_8.asm"
|
||||
custom_iop.'IOP[9]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_9.asm"
|
||||
custom_iop.'IOP[16]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_16.asm"
|
||||
custom_iop.'IOP[17]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_17.asm"
|
||||
custom_iop.'IOP[18]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_18.asm"
|
||||
custom_iop.'IOP[19]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_19.asm"
|
||||
custom_iop.'IOP[20]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_20.asm"
|
||||
custom_iop.'IOP[21]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_21.asm"
|
||||
|
||||
[firmware.op_cfg.default]
|
||||
fill_batch_fifo = true
|
||||
min_batch_size = false
|
||||
use_tiers = false
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
|
||||
[firmware.op_cfg.by_op.MUL]
|
||||
fill_batch_fifo = false
|
||||
min_batch_size = false
|
||||
use_tiers = false
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
|
||||
[firmware.op_cfg.by_op.MULS]
|
||||
fill_batch_fifo = false
|
||||
min_batch_size = false
|
||||
use_tiers = false
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
|
||||
[firmware.op_cfg.by_op.ERC_20]
|
||||
fill_batch_fifo = true
|
||||
min_batch_size = false
|
||||
use_tiers = true
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
@@ -0,0 +1,256 @@
|
||||
module_name="hpu_regif_core_cfg_1in3"
|
||||
description="HPU top-level register interface. Used by the host to retrieve design information, and to configure it."
|
||||
word_size_b = 32
|
||||
offset = 0x00
|
||||
range = 0x10000
|
||||
ext_pkg = ["axi_if_common_param_pkg", "axi_if_shell_axil_pkg"]
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.entry_cfg_1in3]
|
||||
description="entry_cfg_1in3 section with known value used for debug."
|
||||
offset= 0x0
|
||||
|
||||
[section.entry_cfg_1in3.register.dummy_val0]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x01010101}
|
||||
|
||||
[section.entry_cfg_1in3.register.dummy_val1]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x11111111}
|
||||
|
||||
[section.entry_cfg_1in3.register.dummy_val2]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x21212121}
|
||||
|
||||
|
||||
[section.entry_cfg_1in3.register.dummy_val3]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x31313131}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.info]
|
||||
description="RTL architecture parameters"
|
||||
offset= 0x10
|
||||
|
||||
[section.info.register.version]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="VERSION"}
|
||||
|
||||
[section.info.register.ntt_architecture]
|
||||
description="NTT architecture"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="NTT_CORE_ARCH"}
|
||||
|
||||
[section.info.register.ntt_structure]
|
||||
description="NTT structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.radix = { size_b=8, offset_b=0 , default={Param="R"}, description="NTT radix"}
|
||||
field.psi = { size_b=8, offset_b=8 , default={Param="PSI"}, description="NTT psi"}
|
||||
field.div = { size_b=8, offset_b=16, default={Param="BWD_PSI_DIV"}, description="NTT backward div"}
|
||||
field.delta = { size_b=8, offset_b=24, default={Param="DELTA"}, description="NTT network delta (for wmm arch)"}
|
||||
|
||||
[section.info.register.ntt_rdx_cut]
|
||||
description="NTT radix cuts, in log2 unit (for gf64 arch)"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.radix_cut0 = { size_b=4, offset_b=0 , default={Param="NTT_RDX_CUT_S_0"}, description="NTT radix cut #0"}
|
||||
field.radix_cut1 = { size_b=4, offset_b=4 , default={Param="NTT_RDX_CUT_S_1"}, description="NTT radix cut #1"}
|
||||
field.radix_cut2 = { size_b=4, offset_b=8 , default={Param="NTT_RDX_CUT_S_2"}, description="NTT radix cut #2"}
|
||||
field.radix_cut3 = { size_b=4, offset_b=12, default={Param="NTT_RDX_CUT_S_3"}, description="NTT radix cut #3"}
|
||||
field.radix_cut4 = { size_b=4, offset_b=16, default={Param="NTT_RDX_CUT_S_4"}, description="NTT radix cut #4"}
|
||||
field.radix_cut5 = { size_b=4, offset_b=20, default={Param="NTT_RDX_CUT_S_5"}, description="NTT radix cut #5"}
|
||||
field.radix_cut6 = { size_b=4, offset_b=24, default={Param="NTT_RDX_CUT_S_6"}, description="NTT radix cut #6"}
|
||||
field.radix_cut7 = { size_b=4, offset_b=28, default={Param="NTT_RDX_CUT_S_7"}, description="NTT radix cut #7"}
|
||||
|
||||
[section.info.register.ntt_pbs]
|
||||
description="Maximum number of PBS in the NTT pipeline"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.batch_pbs_nb = { size_b=8, offset_b=0 , default={Param="BATCH_PBS_NB"}, description="Maximum number of PBS in the NTT pipe"}
|
||||
field.total_pbs_nb = { size_b=8, offset_b=8 , default={Param="TOTAL_PBS_NB"}, description="Maximum number of PBS stored in PEP buffer"}
|
||||
|
||||
[section.info.register.ntt_modulo]
|
||||
description="Code associated to the NTT prime"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="MOD_NTT_NAME"}
|
||||
|
||||
[section.info.register.application]
|
||||
description="Code associated with the application"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="APPLICATION_NAME"}
|
||||
|
||||
[section.info.register.ks_structure]
|
||||
description="Key-switch structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.x = { size_b=8, offset_b=0 , default={Param="LBX"}, description="Number of coefficients on X dimension"}
|
||||
field.y = { size_b=8, offset_b=8 , default={Param="LBY"}, description="Number of coefficients on Y dimension"}
|
||||
field.z = { size_b=8, offset_b=16, default={Param="LBZ"}, description="Number of coefficients on Z dimension"}
|
||||
|
||||
[section.info.register.ks_crypto_param]
|
||||
description="Key-switch crypto parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.mod_ksk_w = { size_b=8, offset_b=0 , default={Param="MOD_KSK_W"}, description="Width of KSK modulo"}
|
||||
field.ks_l = { size_b=8, offset_b=8 , default={Param="KS_L"}, description="Number of KS decomposition level"}
|
||||
field.ks_b = { size_b=8, offset_b=16, default={Param="KS_B_W"}, description="Width of KS decomposition base"}
|
||||
|
||||
[section.info.register.regf_structure]
|
||||
description="Register file structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.reg_nb = { size_b=8, offset_b=0 , default={Param="REGF_REG_NB"}, description="Number of registers in regfile"}
|
||||
field.coef_nb = { size_b=8, offset_b=8 , default={Param="REGF_COEF_NB"}, description="Number of coefficients at regfile interface"}
|
||||
|
||||
[section.info.register.isc_structure]
|
||||
description="Instruction scheduler structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.depth = { size_b=8, offset_b=0 , default={Param="ISC_DEPTH"}, description="Number of slots in ISC lookahead buffer."}
|
||||
field.min_iop_size = { size_b=8, offset_b=8 , default={Param="MIN_IOP_SIZE"}, description="Minimum number of DOp per IOp to prevent sync_id overflow."}
|
||||
|
||||
[section.info.register.pe_properties]
|
||||
description="Processing elements parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.alu_nb = { size_b=8, offset_b=24 , default={Param="PEA_ALU_NB"}, description="Number of coefficients processed in parallel in pe_alu"}
|
||||
field.pep_regf_period = { size_b=8, offset_b=16 , default={Param="PEP_REGF_PERIOD"}, description="Number of cycles between 2 consecutive data transfer between PEP and regfile"}
|
||||
field.pem_regf_period = { size_b=8, offset_b=8 , default={Param="PEM_REGF_PERIOD"}, description="Number of cycles between 2 consecutive data transfer between PEM and regfile"}
|
||||
field.pea_regf_period = { size_b=8, offset_b=0 , default={Param="PEA_REGF_PERIOD"}, description="Number of cycles between 2 consecutive data transfer between PEA and regfile"}
|
||||
|
||||
[section.info.register.bsk_structure]
|
||||
description="BSK manager structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.bsk_cut_nb = { size_b=8, offset_b=8 , default={Param="BSK_CUT_NB"}, description="BSK cut nb"}
|
||||
|
||||
[section.info.register.ksk_structure]
|
||||
description="KSK manager structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.ksk_cut_nb = { size_b=8, offset_b=8 , default={Param="KSK_CUT_NB"}, description="KSK cut nb"}
|
||||
|
||||
[section.info.register.hbm_axi4_nb]
|
||||
description="Number of AXI4 connections to HBM"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.bsk_pc = { size_b=8, offset_b=0 , default={Param="BSK_PC"}, description="Number of HBM connections for BSK"}
|
||||
field.ksk_pc = { size_b=8, offset_b=8, default={Param="KSK_PC"}, description="Number of HBM connections for KSK"}
|
||||
field.pem_pc = { size_b=8, offset_b=16, default={Param="PEM_PC"}, description="Number of HBM connections for ciphertexts (PEM)"}
|
||||
field.glwe_pc = { size_b=8, offset_b=24, default={Param="GLWE_PC"}, description="Number of HBM connections for GLWE"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_pem]
|
||||
description="Ciphertext HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_PEM_DATA_W"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_glwe]
|
||||
description="GLWE HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_GLWE_DATA_W"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_bsk]
|
||||
description="BSK HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_BSK_DATA_W"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_ksk]
|
||||
description="KSK HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_KSK_DATA_W"}
|
||||
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.hbm_axi4_addr_1in3]
|
||||
offset= 0x1000
|
||||
description="HBM AXI4 connection address offset"
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.ct]
|
||||
description="Address offset for each ciphertext HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb","_pc1_lsb", "_pc1_msb"]
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.glwe]
|
||||
description="Address offset for each GLWE HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb"]
|
||||
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.ksk]
|
||||
description="Address offset for each KSK HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb", "_pc1_lsb", "_pc1_msb", "_pc2_lsb", "_pc2_msb", "_pc3_lsb", "_pc3_msb", "_pc4_lsb", "_pc4_msb", "_pc5_lsb", "_pc5_msb", "_pc6_lsb", "_pc6_msb", "_pc7_lsb", "_pc7_msb", "_pc8_lsb", "_pc8_msb", "_pc9_lsb", "_pc9_msb", "_pc10_lsb", "_pc10_msb", "_pc11_lsb", "_pc11_msb", "_pc12_lsb", "_pc12_msb", "_pc13_lsb", "_pc13_msb", "_pc14_lsb", "_pc14_msb", "_pc15_lsb", "_pc15_msb"]
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.trc]
|
||||
description="Address offset for each trace HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb"]
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.bpip]
|
||||
offset= 0x2000
|
||||
description="BPIP configuration"
|
||||
|
||||
[section.bpip.register.use]
|
||||
description="(1) Use BPIP mode, (0) use IPIP mode (default)"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
field.use_bpip = { size_b=1, offset_b=0 , default={Cst=1}, description="use"}
|
||||
field.use_opportunism = { size_b=1, offset_b=1 , default={Cst=0}, description="use opportunistic PBS flush"}
|
||||
|
||||
[section.bpip.register.timeout]
|
||||
description="Timeout for BPIP mode"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
default={Cst=0xffffffff}
|
||||
@@ -0,0 +1,51 @@
|
||||
module_name="hpu_regif_core_cfg_3in3"
|
||||
description="HPU top-level register interface. Used by the host to retrieve design information, and to configure it."
|
||||
word_size_b = 32
|
||||
offset = 0x20000
|
||||
range = 0x10000
|
||||
ext_pkg = ["axi_if_common_param_pkg", "axi_if_shell_axil_pkg"]
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.entry_cfg_3in3]
|
||||
description="entry_cfg_3in3 section with known value used for debug."
|
||||
offset= 0x0
|
||||
|
||||
[section.entry_cfg_3in3.register.dummy_val0]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x03030303}
|
||||
|
||||
[section.entry_cfg_3in3.register.dummy_val1]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x13131313}
|
||||
|
||||
[section.entry_cfg_3in3.register.dummy_val2]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x23232323}
|
||||
|
||||
[section.entry_cfg_3in3.register.dummy_val3]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x33333333}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.hbm_axi4_addr_3in3]
|
||||
description="HBM AXI4 connection address offset"
|
||||
offset= 0x10
|
||||
|
||||
[section.hbm_axi4_addr_3in3.register.bsk]
|
||||
description="Address offset for each BSK HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb", "_pc1_lsb", "_pc1_msb", "_pc2_lsb", "_pc2_msb", "_pc3_lsb", "_pc3_msb", "_pc4_lsb", "_pc4_msb", "_pc5_lsb", "_pc5_msb", "_pc6_lsb", "_pc6_msb", "_pc7_lsb", "_pc7_msb", "_pc8_lsb", "_pc8_msb", "_pc9_lsb", "_pc9_msb", "_pc10_lsb", "_pc10_msb", "_pc11_lsb", "_pc11_msb", "_pc12_lsb", "_pc12_msb", "_pc13_lsb", "_pc13_msb", "_pc14_lsb", "_pc14_msb", "_pc15_lsb", "_pc15_msb"]
|
||||
@@ -0,0 +1,336 @@
|
||||
module_name="hpu_regif_core_prc_1in3"
|
||||
description="HPU top-level register interface. Used by the host to retrieve design information, and to configure it."
|
||||
word_size_b = 32
|
||||
offset = 0x10000
|
||||
range = 0x10000
|
||||
ext_pkg = ["axi_if_common_param_pkg", "axi_if_shell_axil_pkg"]
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.entry_prc_1in3]
|
||||
description="entry_prc_1in3 section with known value used for debug."
|
||||
offset= 0x0
|
||||
|
||||
[section.entry_prc_1in3.register.dummy_val0]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x02020202}
|
||||
|
||||
[section.entry_prc_1in3.register.dummy_val1]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x12121212}
|
||||
|
||||
[section.entry_prc_1in3.register.dummy_val2]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x22222222}
|
||||
|
||||
[section.entry_prc_1in3.register.dummy_val3]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x32323232}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.status_1in3]
|
||||
description="HPU status of part 1in3"
|
||||
offset= 0x10
|
||||
|
||||
[section.status_1in3.register.error]
|
||||
description="Error register (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.pbs = { size_b=32, offset_b=0 , default={Cst=0}, description="HPU error part 1in3"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.ksk_avail]
|
||||
description="KSK availability configuration"
|
||||
offset= 0x1000
|
||||
|
||||
[section.ksk_avail.register.avail]
|
||||
description="KSK available bit"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
field.avail = { size_b=1, offset_b=0 , default={Cst=0}, description="avail"}
|
||||
|
||||
[section.ksk_avail.register.reset]
|
||||
description="KSK reset sequence"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.request = { size_b=1, offset_b=0 , default={Cst=0}, description="request"}
|
||||
field.done = { size_b=1, offset_b=31 , default={Cst=0}, description="done"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.runtime_1in3]
|
||||
description="Runtime information"
|
||||
offset= 0x2000
|
||||
|
||||
[section.runtime_1in3.register.pep_cmux_loop]
|
||||
description="PEP: CMUX iteration loop number"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.br_loop = { size_b=15, offset_b=0 , default={Cst=0}, description="PBS current BR-loop"}
|
||||
field.br_loop_c = { size_b=1, offset_b=15 , default={Cst=0}, description="PBS current BR-loop parity"}
|
||||
field.ks_loop = { size_b=15, offset_b=16 , default={Cst=0}, description="KS current KS-loop"}
|
||||
field.ks_loop_c = { size_b=1, offset_b=31 , default={Cst=0}, description="KS current KS-loop parity"}
|
||||
|
||||
[section.runtime_1in3.register.pep_pointer_0]
|
||||
description="PEP: pointers (part 1)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.pool_rp = { size_b=8, offset_b=0 , default={Cst=0}, description="PEP pool_rp"}
|
||||
field.pool_wp = { size_b=8, offset_b=8 , default={Cst=0}, description="PEP pool_wp"}
|
||||
field.ldg_pt = { size_b=8, offset_b=16 , default={Cst=0}, description="PEP ldg_pt"}
|
||||
field.ldb_pt = { size_b=8, offset_b=24 , default={Cst=0}, description="PEP ldb_pt"}
|
||||
|
||||
[section.runtime_1in3.register.pep_pointer_1]
|
||||
description="PEP: pointers (part 2)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.ks_in_rp = { size_b=8, offset_b=0 , default={Cst=0}, description="PEP ks_in_rp"}
|
||||
field.ks_in_wp = { size_b=8, offset_b=8 , default={Cst=0}, description="PEP ks_in_wp"}
|
||||
field.ks_out_rp = { size_b=8, offset_b=16 , default={Cst=0}, description="PEP ks_out_rp"}
|
||||
field.ks_out_wp = { size_b=8, offset_b=24 , default={Cst=0}, description="PEP ks_out_wp"}
|
||||
|
||||
[section.runtime_1in3.register.pep_pointer_2]
|
||||
description="PEP: pointers (part 3)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.pbs_in_rp = { size_b=8, offset_b=0 , default={Cst=0}, description="PEP pbs_in_rp"}
|
||||
field.pbs_in_wp = { size_b=8, offset_b=8 , default={Cst=0}, description="PEP pbs_in_wp"}
|
||||
field.ipip_flush_last_pbs_in_loop = { size_b=16, offset_b=16 , default={Cst=0}, description="PEP IPIP flush last pbs_in_loop"}
|
||||
|
||||
[section.runtime_1in3.register.isc_latest_instruction]
|
||||
description="ISC: 4 latest instructions received ([0] is the most recent)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
duplicate=["_0","_1","_2","_3"]
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_cnt]
|
||||
description="PEP: BPIP batch counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_flush_cnt]
|
||||
description="PEP: BPIP batch triggered by a flush counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_timeout_cnt]
|
||||
description="PEP: BPIP batch triggered by a timeout counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_waiting_batch_cnt]
|
||||
description="PEP: BPIP batch that waits the trigger counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_filling_cnt]
|
||||
description="PEP: Count batch with filled with a given number of CT (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
duplicate=["_1","_2","_3","_4","_5","_6","_7","_8","_9","_10","_11","_12","_13","_14","_15","_16"]
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_ld_ack_cnt]
|
||||
description="PEP: load BLWE ack counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_cmux_not_full_batch_cnt]
|
||||
description="PEP: not full batch CMUX counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_ipip_flush_cnt]
|
||||
description="PEP: IPIP flush CMUX counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ldb_rcp_dur]
|
||||
description="PEP: load BLWE reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ldg_req_dur]
|
||||
description="PEP: load GLWE request max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ldg_rcp_dur]
|
||||
description="PEP: load GLWE reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_load_ksk_rcp_dur]
|
||||
description="PEP: load KSK slice reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
duplicate=["_pc0","_pc1","_pc2","_pc3","_pc4","_pc5","_pc6","_pc7","_pc8","_pc9","_pc10","_pc11","_pc12","_pc13","_pc14","_pc15"]
|
||||
|
||||
|
||||
[section.runtime_1in3.register.pep_mmacc_sxt_rcp_dur]
|
||||
description="PEP: MMACC SXT reception duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_mmacc_sxt_req_dur]
|
||||
description="PEP: MMACC SXT request duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_mmacc_sxt_cmd_wait_b_dur]
|
||||
description="PEP: MMACC SXT command wait for b duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_inst_cnt]
|
||||
description="PEP: input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ack_cnt]
|
||||
description="PEP: instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_load_inst_cnt]
|
||||
description="PEM: load input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_load_ack_cnt]
|
||||
description="PEM: load instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_store_inst_cnt]
|
||||
description="PEM: store input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_store_ack_cnt]
|
||||
description="PEM: store instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pea_inst_cnt]
|
||||
description="PEA: input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pea_ack_cnt]
|
||||
description="PEA: instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.isc_inst_cnt]
|
||||
description="ISC: input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.isc_ack_cnt]
|
||||
description="ISC: instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_load_info_0]
|
||||
description="PEM: load first data)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
duplicate=["_pc0_0","_pc0_1","_pc0_2","_pc0_3","_pc1_0","_pc1_1","_pc1_2","_pc1_3"]
|
||||
|
||||
[section.runtime_1in3.register.pem_load_info_1]
|
||||
description="PEM: load first address"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
duplicate=["_pc0_lsb","_pc0_msb","_pc1_lsb","_pc1_msb"]
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_0]
|
||||
description="PEM: store info 0)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.cmd_vld = { size_b=1, offset_b=0 , default={Cst=0}, description="PEM_ST cmd vld"}
|
||||
field.cmd_rdy = { size_b=1, offset_b=1 , default={Cst=0}, description="PEM_ST cmd rdy"}
|
||||
field.pem_regf_rd_req_vld = { size_b=1, offset_b=2 , default={Cst=0}, description="PEM_ST pem_regf_rd_req_vld"}
|
||||
field.pem_regf_rd_req_rdy = { size_b=1, offset_b=3 , default={Cst=0}, description="PEM_ST pem_regf_rd_req_rdy"}
|
||||
field.brsp_fifo_in_vld = { size_b=4, offset_b=4 , default={Cst=0}, description="PEM_ST brsp_fifo_in_vld"}
|
||||
field.brsp_fifo_in_rdy = { size_b=4, offset_b=8 , default={Cst=0}, description="PEM_ST brsp_fifo_in_rdy"}
|
||||
field.rcp_fifo_in_vld = { size_b=4, offset_b=12 , default={Cst=0}, description="PEM_ST rcp_fifo_in_vld"}
|
||||
field.rcp_fifo_in_rdy = { size_b=4, offset_b=16 , default={Cst=0}, description="PEM_ST rcp_fifo_in_rdy"}
|
||||
field.r2_axi_vld = { size_b=4, offset_b=20 , default={Cst=0}, description="PEM_ST r2_axi_vld"}
|
||||
field.r2_axi_rdy = { size_b=4, offset_b=24 , default={Cst=0}, description="PEM_ST r2_axi_rdy"}
|
||||
field.c0_enough_location = { size_b=4, offset_b=28 , default={Cst=0}, description="PEM_ST c0_enough_location"}
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_1]
|
||||
description="PEM: store info 1"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.s0_cmd_vld = { size_b=4, offset_b=0 , default={Cst=0}, description="PEM_ST s0_cmd_vld"}
|
||||
field.s0_cmd_rdy = { size_b=4, offset_b=4 , default={Cst=0}, description="PEM_ST s0_cmd_rdy"}
|
||||
field.m_axi_bvalid = { size_b=4, offset_b=8 , default={Cst=0}, description="PEM_ST m_axi_bvalid"}
|
||||
field.m_axi_bready = { size_b=4, offset_b=12 , default={Cst=0}, description="PEM_ST m_axi_bready"}
|
||||
field.m_axi_wvalid = { size_b=4, offset_b=16 , default={Cst=0}, description="PEM_ST m_axi_wvalid"}
|
||||
field.m_axi_wready = { size_b=4, offset_b=20 , default={Cst=0}, description="PEM_ST m_axi_wready"}
|
||||
field.m_axi_awvalid = { size_b=4, offset_b=24 , default={Cst=0}, description="PEM_ST m_axi_awvalid"}
|
||||
field.m_axi_awready = { size_b=4, offset_b=28 , default={Cst=0}, description="PEM_ST m_axi_awready"}
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_2]
|
||||
description="PEM: store info 2"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.c0_free_loc_cnt = { size_b=16, offset_b=0 , default={Cst=0}, description="PEM_ST c0_free_loc_cnt"}
|
||||
field.brsp_bresp_cnt = { size_b=16, offset_b=16 , default={Cst=0}, description="PEM_ST brsp_bresp_cnt"}
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_3]
|
||||
description="PEM: store info 3"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.brsp_ack_seen = { size_b=16, offset_b=0 , default={Cst=0}, description="PEM_ST brsp_ack_seen"}
|
||||
field.c0_cmd_cnt = { size_b=8, offset_b=16 , default={Cst=0}, description="PEM_ST c0_cmd_cnt"}
|
||||
@@ -0,0 +1,100 @@
|
||||
module_name="hpu_regif_core_prc_3in3"
|
||||
description="HPU top-level register interface. Used by the host to retrieve design information, and to configure it."
|
||||
word_size_b = 32
|
||||
offset = 0x30000
|
||||
range = 0x10000
|
||||
ext_pkg = ["axi_if_common_param_pkg", "axi_if_shell_axil_pkg"]
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.entry_prc_3in3]
|
||||
description="entry_prc_3in3 section with known value used for debug."
|
||||
offset= 0x0
|
||||
|
||||
[section.entry_prc_3in3.register.dummy_val0]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x04040404}
|
||||
|
||||
[section.entry_prc_3in3.register.dummy_val1]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x14141414}
|
||||
|
||||
[section.entry_prc_3in3.register.dummy_val2]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x24242424}
|
||||
|
||||
[section.entry_prc_3in3.register.dummy_val3]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x34343434}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.status_3in3]
|
||||
description="HPU status of parts 2in3 and 3in3"
|
||||
offset= 0x10
|
||||
|
||||
[section.status_3in3.register.error]
|
||||
description="Error register (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.pbs = { size_b=32, offset_b=0 , default={Cst=0}, description="HPU error part 3in3"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.bsk_avail]
|
||||
description="BSK availability configuration"
|
||||
offset= 0x1000
|
||||
|
||||
[section.bsk_avail.register.avail]
|
||||
description="BSK available bit"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
field.avail = { size_b=1, offset_b=0 , default={Cst=0}, description="avail"}
|
||||
|
||||
[section.bsk_avail.register.reset]
|
||||
description="BSK reset sequence"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.request = { size_b=1, offset_b=0 , default={Cst=0}, description="request"}
|
||||
field.done = { size_b=1, offset_b=31 , default={Cst=0}, description="done"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.runtime_3in3]
|
||||
description="Runtime information"
|
||||
offset= 0x2000
|
||||
|
||||
[section.runtime_3in3.register.pep_load_bsk_rcp_dur]
|
||||
description="PEP: load BSK slice reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
duplicate=["_pc0","_pc1","_pc2","_pc3","_pc4","_pc5","_pc6","_pc7","_pc8","_pc9","_pc10","_pc11","_pc12","_pc13","_pc14","_pc15"]
|
||||
|
||||
[section.runtime_3in3.register.pep_bskif_req_info_0]
|
||||
description="PEP: BSK_IF: requester info 0"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.req_br_loop_rp = { size_b=16, offset_b=0 , default={Cst=0}, description="PEP BSK_IF requester BSK read pointer"}
|
||||
field.req_br_loop_wp = { size_b=16, offset_b=16 , default={Cst=0}, description="PEP BSK_IF requester BSK write pointer"}
|
||||
|
||||
[section.runtime_3in3.register.pep_bskif_req_info_1]
|
||||
description="PEP: BSK_IF: requester info 0"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.req_prf_br_loop = { size_b=16, offset_b=0 , default={Cst=0}, description="PEP BSK_IF requester BSK prefetch pointer"}
|
||||
field.req_parity = { size_b=1, offset_b=16 , default={Cst=0}, description="PEP BSK_IF requester BSK pointer parity"}
|
||||
field.req_assigned = { size_b=1, offset_b=31 , default={Cst=0}, description="PEP BSK_IF requester assignment"}
|
||||
@@ -0,0 +1,22 @@
|
||||
module_name="tb_hpu_regif_dummy"
|
||||
description="Fake registers needed by the mockup"
|
||||
word_size_b = 32
|
||||
offset = 0x40000
|
||||
range = 0x10000
|
||||
ext_pkg = ["axi_if_common_param_pkg", "axi_if_shell_axil_pkg"]
|
||||
|
||||
# ==============================================================================
|
||||
[section.WorkAck]
|
||||
description="Purpose of this section"
|
||||
|
||||
[section.WorkAck.register.workq]
|
||||
description="Insert work in workq and read status"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.WorkAck.register.ackq]
|
||||
description="Pop ack from in ackq"
|
||||
owner="Kernel"
|
||||
read_access="ReadNotify"
|
||||
write_access="None"
|
||||
@@ -0,0 +1,6 @@
|
||||
# Fpga version
|
||||
|
||||
Built with the following command: (i.e. xrt/run_syn_hpu_msplit_3parts_64b.sh)
|
||||
```
|
||||
just zaxl-build hpu_msplit_3parts 3 "0:300" "-F TOP_MSPLIT TOP_MSPLIT_1 -F TOP_BATCH TOP_BATCH_TOPhpu_BPBS8_TPBS32 -F TOP_PCMAX TOP_PCMAX_pem2_glwe1_bsk8_ksk8 -F TOP_PC TOP_PC_pem2_glwe1_bsk4_ksk4 -F APPLICATION APPLI_msg2_carry2 -F NTT_MOD NTT_MOD_goldilocks -F NTT_CORE_ARCH NTT_CORE_ARCH_gf64 -F NTT_CORE_R_PSI NTT_CORE_R2_PSI16 -F NTT_CORE_RDX_CUT NTT_CORE_RDX_CUT_n5c5c1 -F NTT_CORE_DIV NTT_CORE_DIV_1 -F BSK_SLOT_CUT BSK_SLOT8_CUT4 -F KSK_SLOT_CUT KSK_SLOT8_CUT4 -F KSLB KSLB_x2y32z3 -F HPU_PART HPU_PART_gf64 -F AXI_DATA_W AXI_DATA_W_512" "1:${PROJECT_DIR}/hw/output/micro_code/ucore_fw.elf" 'D:MEMORY_FILE_PATH=\\\"${PROJECT_DIR}/hw/\\\"' | tee build_out.log
|
||||
```
|
||||
@@ -0,0 +1,15 @@
|
||||
# CUST_0
|
||||
# Simple IOp to check the xfer between Hpu/Cpu
|
||||
# Construct constant in dest slot -> 249 (0xf9)
|
||||
SUB R0 R0 R0
|
||||
ADDS R0 R0 1
|
||||
ST TD[0].0 R0
|
||||
SUB R1 R1 R1
|
||||
ADDS R1 R1 2
|
||||
ST TD[0].1 R1
|
||||
SUB R2 R2 R2
|
||||
ADDS R2 R2 3
|
||||
ST TD[0].2 R2
|
||||
SUB R3 R3 R3
|
||||
ADDS R3 R3 3
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,11 @@
|
||||
# CUST_1
|
||||
# Simple IOp to check the xfer between Hpu/Cpu
|
||||
# Dest <- Src_a
|
||||
LD R0 TS[0].0
|
||||
LD R1 TS[0].1
|
||||
LD R2 TS[0].2
|
||||
LD R3 TS[0].3
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,25 @@
|
||||
; CUST_8
|
||||
; Simple IOp to check the ALU operation
|
||||
; Dst[0].0 <- Src[0].0 + Src[1].0
|
||||
LD R1 TS[0].0
|
||||
LD R2 TS[1].0
|
||||
ADD R0 R1 R2
|
||||
ST TD[0].0 R0
|
||||
|
||||
; Dst[0].1 <- Src[0].1 + Src[1].1
|
||||
LD R5 TS[0].1
|
||||
LD R6 TS[1].1
|
||||
ADD R4 R5 R6
|
||||
ST TD[0].2 R4
|
||||
|
||||
; Dst[0].2 <- Src[0].2 + Src[1].2
|
||||
LD R9 TS[0].2
|
||||
LD R10 TS[1].2
|
||||
ADD R8 R9 R10
|
||||
ST TD[0].2 R8
|
||||
|
||||
; Dst[0].3 <- Src[0].3 + Src[1].3
|
||||
LD R13 TS[0].3
|
||||
LD R14 TS[1].3
|
||||
ADD R12 R13 R14
|
||||
ST TD[0].3 R0
|
||||
@@ -0,0 +1,6 @@
|
||||
# CUST_16
|
||||
# Simple IOp to check PBS behavior
|
||||
# Dest <- PBSNone(Src_a.0)
|
||||
LD R0 TS[0].0
|
||||
PBS_F R0 R0 PbsNone
|
||||
ST TD[0].0 R0
|
||||
@@ -0,0 +1,15 @@
|
||||
# CUST_17
|
||||
# Simple IOp to check PBS behavior
|
||||
# Dest <- PBSNone(Src_a)
|
||||
LD R0 TS[0].0
|
||||
PBS R0 R0 PbsNone
|
||||
ST TD[0].0 R0
|
||||
LD R1 TS[0].1
|
||||
PBS R1 R1 PbsNone
|
||||
ST TD[0].1 R1
|
||||
LD R2 TS[0].2
|
||||
PBS R2 R2 PbsNone
|
||||
ST TD[0].2 R2
|
||||
LD R3 TS[0].3
|
||||
PBS_F R3 R3 PbsNone
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,23 @@
|
||||
; CUST_18
|
||||
; Simple IOp to check extraction pattern
|
||||
; Correct result:
|
||||
; * Dst[0,1] <- Src[0][0,1]
|
||||
; * Dst[2,3] <- Src[1][0,1]
|
||||
|
||||
; Pack Src[0][0,1] with a Mac and extract Carry/Msg in Dst[0][0,1]
|
||||
LD R0 TS[0].0
|
||||
LD R1 TS[0].1
|
||||
MAC R3 R1 R0 4
|
||||
PBS R4 R3 PbsMsgOnly
|
||||
PBS R5 R3 PbsCarryInMsg
|
||||
ST TD[0].0 R4
|
||||
ST TD[0].1 R5
|
||||
|
||||
; Pack Src[1][0,1] with a Mac and extract Carry/Msg in Dst[0][2,3]
|
||||
LD R10 TS[1].0
|
||||
LD R11 TS[1].1
|
||||
MAC R13 R11 R10 4
|
||||
PBS R14 R13 PbsMsgOnly
|
||||
PBS R15 R13 PbsCarryInMsg
|
||||
ST TD[0].2 R14
|
||||
ST TD[0].3 R15
|
||||
@@ -0,0 +1,19 @@
|
||||
; CUST_19
|
||||
; Simple IOp to check PbsMl2
|
||||
; Correct result:
|
||||
; * Dst[0][0] <- Src[0][0]
|
||||
; * Dst[0][1] <- 0
|
||||
; * Dst[0][2] <- Src[0][0] +1
|
||||
; * Dst[0][3] <- 0
|
||||
; i.e Cust_19(0x2) => 0x32
|
||||
|
||||
; Construct a 0 for destination padding
|
||||
SUB R16 R16 R16
|
||||
|
||||
; Apply PbsMl2 on Src[0] result goes in dest[0][0-3] (0-padded)
|
||||
LD R0 TS[0].0
|
||||
PBS_ML2_F R0 R0 PbsTestMany2
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R16
|
||||
ST TD[0].2 R1
|
||||
ST TD[0].3 R16
|
||||
@@ -0,0 +1,11 @@
|
||||
# CUST_2
|
||||
# Simple IOp to check the xfer between Hpu/Cpu
|
||||
# Dest <- Src_b
|
||||
LD R0 TS[1].0
|
||||
LD R1 TS[1].1
|
||||
LD R2 TS[1].2
|
||||
LD R3 TS[1].3
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,22 @@
|
||||
; CUST_20
|
||||
; Simple IOp to check PbsMl4
|
||||
; Correct result:
|
||||
; * Dst[0][0] <- Src[0][0]
|
||||
; * Dst[0][1] <- Src[0][0] +1
|
||||
; * Dst[0][2] <- Src[0][0] +2
|
||||
; * Dst[0][3] <- Src[0][0] +3
|
||||
; i.e Cust_20(0x0) => 0xe4
|
||||
|
||||
SUB R16 R16 R16
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R0
|
||||
ST TD[0].2 R0
|
||||
ST TD[0].3 R0
|
||||
|
||||
; Apply PbsMl4 on Src[0] result goes in dest[0][0-3]
|
||||
LD R0 TS[0].0
|
||||
PBS_ML4_F R0 R0 PbsTestMany4
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,24 @@
|
||||
; CUST_21
|
||||
; Simple IOp to check PbsMl8
|
||||
; WARN: This operation required 16b ct width
|
||||
; Correct result:
|
||||
; * Dst[0][0] <- Src[0][0]
|
||||
; * Dst[0][1] <- Src[0][0] +1
|
||||
; * Dst[0][2] <- Src[0][0] +2
|
||||
; * Dst[0][3] <- Src[0][0] +3
|
||||
; * Dst[0][4] <- Src[0][0] +4
|
||||
; * Dst[0][5] <- Src[0][0] +5
|
||||
; * Dst[0][6] <- Src[0][0] +6
|
||||
; * Dst[0][7] <- Src[0][0] +7
|
||||
|
||||
; Apply PbsMl8 on Src[0] result goes in dest[0][0-7]
|
||||
LD R0 TS[0].0
|
||||
PBS_ML8_F R0 R0 PbsTestMany8
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
ST TD[0].4 R4
|
||||
ST TD[0].5 R5
|
||||
ST TD[0].6 R6
|
||||
ST TD[0].7 R7
|
||||
@@ -0,0 +1,16 @@
|
||||
# CUST_3
|
||||
# Simple IOp to check isc behavior
|
||||
# Generate obvious deps and check that isc correctly issued the dop
|
||||
# Correct result must bu Dest <- Src[0]
|
||||
LD R0 TS[0].0
|
||||
LD R1 TS[0].1
|
||||
LD R2 TS[0].2
|
||||
LD R3 TS[0].3
|
||||
PBS R4 R0 PbsNone
|
||||
ST TD[0].0 R4
|
||||
PBS R4 R1 PbsNone
|
||||
ST TD[0].1 R4
|
||||
PBS R4 R2 PbsNone
|
||||
ST TD[0].2 R4
|
||||
PBS_F R4 R3 PbsNone
|
||||
ST TD[0].3 R4
|
||||
@@ -0,0 +1,19 @@
|
||||
; CUST_8
|
||||
; Simple IOp to check the ALU operation
|
||||
; Dst[0].0 <- Src[0].0 + Src[1].0
|
||||
LD R1 TS[0].0
|
||||
LD R2 TS[1].0
|
||||
ADD R0 R1 R2
|
||||
ST TD[0].0 R0
|
||||
|
||||
; Dst[0].1 <- Src[0].1 - Src[1].1
|
||||
LD R5 TS[0].1
|
||||
LD R6 TS[1].1
|
||||
SUB R4 R5 R6
|
||||
ST TD[0].1 R4
|
||||
|
||||
; Dst[0].2 <- Src[0].2 + (Src[1].2 *4)
|
||||
LD R9 TS[0].2
|
||||
LD R10 TS[1].2
|
||||
MAC R8 R9 R10 4
|
||||
ST TD[0].2 R8
|
||||
@@ -0,0 +1,21 @@
|
||||
; CUST_9
|
||||
; Simple IOp to check the ALU Scalar operation
|
||||
; Dst[0].0 <- Src[0].0 + Imm[0].0
|
||||
LD R1 TS[0].0
|
||||
ADDS R0 R1 TI[0].0
|
||||
ST TD[0].0 R0
|
||||
|
||||
; Dst[0].1 <- Src[0].1 - Imm[0].1
|
||||
LD R5 TS[0].1
|
||||
SUBS R4 R5 TI[0].1
|
||||
ST TD[0].1 R4
|
||||
|
||||
; Dst[0].2 <- Imm[0].2 - Src[0].2
|
||||
LD R9 TS[0].2
|
||||
SSUB R8 R9 TI[0].2
|
||||
ST TD[0].2 R8
|
||||
|
||||
; Dst[0].3 <- Src[0].3 * Imm[0].3
|
||||
LD R13 TS[0].3
|
||||
MULS R12 R13 TI[0].3
|
||||
ST TD[0].3 R12
|
||||
@@ -0,0 +1,98 @@
|
||||
|
||||
[fpga]
|
||||
regmap=["${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core.toml"]
|
||||
polling_us=10
|
||||
[fpga.ffi.Xrt]
|
||||
id= 0
|
||||
kernel= "hpu_msplit_3parts_1in3"
|
||||
xclbin="${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_msplit_3parts.xclbin"
|
||||
|
||||
[rtl]
|
||||
bpip_use = true
|
||||
bpip_use_opportunism = true
|
||||
bpip_timeout = 100_000
|
||||
|
||||
[board]
|
||||
ct_mem = 4096
|
||||
ct_pc = [
|
||||
{Hbm= {pc=10}},
|
||||
{Hbm= {pc=11}},
|
||||
]
|
||||
heap_size = 3584
|
||||
|
||||
lut_mem = 256
|
||||
lut_pc = {Hbm={pc=12}}
|
||||
|
||||
fw_size= 65536
|
||||
fw_pc = {Hbm={pc=1}}
|
||||
|
||||
bsk_pc = [
|
||||
{Hbm={pc=2}},
|
||||
{Hbm={pc=3}},
|
||||
{Hbm={pc=4}},
|
||||
{Hbm={pc=5}},
|
||||
{Hbm={pc=6}},
|
||||
{Hbm={pc=7}},
|
||||
{Hbm={pc=8}},
|
||||
{Hbm={pc=9}}
|
||||
]
|
||||
|
||||
ksk_pc = [
|
||||
{Hbm={pc=24}},
|
||||
{Hbm={pc=25}},
|
||||
{Hbm={pc=26}},
|
||||
{Hbm={pc=27}},
|
||||
{Hbm={pc=28}},
|
||||
{Hbm={pc=29}},
|
||||
{Hbm={pc=30}},
|
||||
{Hbm={pc=31}}
|
||||
]
|
||||
|
||||
trace_pc = {Hbm={pc=0}}
|
||||
trace_depth = 4 # In MB
|
||||
|
||||
[firmware]
|
||||
implementation = "Llt"
|
||||
integer_w=[4,6,8,10,12,14,16,32,64,128]
|
||||
min_batch_size = 6
|
||||
kogge_cfg = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/kogge_cfg.toml"
|
||||
custom_iop.'IOP[0]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_0.asm"
|
||||
custom_iop.'IOP[1]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_1.asm"
|
||||
custom_iop.'IOP[2]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_2.asm"
|
||||
custom_iop.'IOP[3]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_3.asm"
|
||||
custom_iop.'IOP[8]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_8.asm"
|
||||
custom_iop.'IOP[9]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_9.asm"
|
||||
custom_iop.'IOP[16]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_16.asm"
|
||||
custom_iop.'IOP[17]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_17.asm"
|
||||
custom_iop.'IOP[18]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_18.asm"
|
||||
custom_iop.'IOP[19]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_19.asm"
|
||||
custom_iop.'IOP[20]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_20.asm"
|
||||
custom_iop.'IOP[21]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_21.asm"
|
||||
|
||||
[firmware.op_cfg.default]
|
||||
fill_batch_fifo = true
|
||||
min_batch_size = false
|
||||
use_tiers = false
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
|
||||
[firmware.op_cfg.by_op.MUL]
|
||||
fill_batch_fifo = false
|
||||
min_batch_size = false
|
||||
use_tiers = false
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
|
||||
[firmware.op_cfg.by_op.MULS]
|
||||
fill_batch_fifo = false
|
||||
min_batch_size = false
|
||||
use_tiers = false
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
|
||||
[firmware.op_cfg.by_op.ERC_20]
|
||||
fill_batch_fifo = false
|
||||
min_batch_size = true
|
||||
use_tiers = true
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:35ad67cf9760e37256a6c92cf29ea67334690b724fd3b7b859919ee9b0bde6d3
|
||||
size 78194785
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,622 @@
|
||||
# This is a sample example of register-map definition
|
||||
|
||||
module_name="hpu_regif_core"
|
||||
description="Hpu top-level register interface. Used by the host to retrieved RTL information, configure it and issue commands."
|
||||
word_size_b = 32
|
||||
offset = 0x00
|
||||
range = 0x10000
|
||||
ext_pkg = ["axi_if_common_param_pkg", "axi_if_shell_axil_pkg"]
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.Xrt]
|
||||
description="Vitis Required registers"
|
||||
offset= 0x0
|
||||
|
||||
# Currently not in used -> Placeholder only
|
||||
[section.Xrt.register.reserved]
|
||||
description="Xrt reserved"
|
||||
default={Cst=0x00}
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.info]
|
||||
description="Contain all the RTL parameters used that have impact on associated SW"
|
||||
offset= 0x10
|
||||
|
||||
[section.info.register.version]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="VERSION"}
|
||||
|
||||
[section.info.register.ntt_architecture]
|
||||
description="NTT architecture"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="NTT_CORE_ARCH"}
|
||||
|
||||
[section.info.register.ntt_structure]
|
||||
description="NTT structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.radix = { size_b=8, offset_b=0 , default={Param="R"}, description="NTT radix"}
|
||||
field.psi = { size_b=8, offset_b=8 , default={Param="PSI"}, description="NTT psi"}
|
||||
field.div = { size_b=8, offset_b=16, default={Param="BWD_PSI_DIV"}, description="NTT backward div"}
|
||||
field.delta = { size_b=8, offset_b=24, default={Param="DELTA"}, description="NTT network delta (for wmm arch)"}
|
||||
|
||||
[section.info.register.ntt_rdx_cut]
|
||||
description="NTT radix cuts, in log2 unit (for gf64 arch)"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.radix_cut0 = { size_b=4, offset_b=0 , default={Param="NTT_RDX_CUT_S_0"}, description="NTT radix cut #0"}
|
||||
field.radix_cut1 = { size_b=4, offset_b=4 , default={Param="NTT_RDX_CUT_S_1"}, description="NTT radix cut #1"}
|
||||
field.radix_cut2 = { size_b=4, offset_b=8 , default={Param="NTT_RDX_CUT_S_2"}, description="NTT radix cut #2"}
|
||||
field.radix_cut3 = { size_b=4, offset_b=12, default={Param="NTT_RDX_CUT_S_3"}, description="NTT radix cut #3"}
|
||||
field.radix_cut4 = { size_b=4, offset_b=16, default={Param="NTT_RDX_CUT_S_4"}, description="NTT radix cut #4"}
|
||||
field.radix_cut5 = { size_b=4, offset_b=20, default={Param="NTT_RDX_CUT_S_5"}, description="NTT radix cut #5"}
|
||||
field.radix_cut6 = { size_b=4, offset_b=24, default={Param="NTT_RDX_CUT_S_6"}, description="NTT radix cut #6"}
|
||||
field.radix_cut7 = { size_b=4, offset_b=28, default={Param="NTT_RDX_CUT_S_7"}, description="NTT radix cut #7"}
|
||||
|
||||
[section.info.register.ntt_pbs]
|
||||
description="Maximum number of PBS in the NTT pipeline"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.batch_pbs_nb = { size_b=8, offset_b=0 , default={Param="BATCH_PBS_NB"}, description="Maximum number of PBS in the NTT pipe"}
|
||||
field.total_pbs_nb = { size_b=8, offset_b=8 , default={Param="TOTAL_PBS_NB"}, description="Maximum number of PBS stored in PEP buffer"}
|
||||
|
||||
[section.info.register.ntt_modulo]
|
||||
description="Code associated to the NTT prime"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="MOD_NTT_NAME"}
|
||||
|
||||
[section.info.register.application]
|
||||
description="Code associated with the application"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="APPLICATION_NAME"}
|
||||
|
||||
[section.info.register.ks_structure]
|
||||
description="Key-switch structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.x = { size_b=8, offset_b=0 , default={Param="LBX"}, description="Number of coefficients on X dimension"}
|
||||
field.y = { size_b=8, offset_b=8 , default={Param="LBY"}, description="Number of coefficients on Y dimension"}
|
||||
field.z = { size_b=8, offset_b=16, default={Param="LBZ"}, description="Number of coefficients on Z dimension"}
|
||||
|
||||
[section.info.register.ks_crypto_param]
|
||||
description="Key-switch crypto parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.mod_ksk_w = { size_b=8, offset_b=0 , default={Param="MOD_KSK_W"}, description="Width of KSK modulo"}
|
||||
field.ks_l = { size_b=8, offset_b=8 , default={Param="KS_L"}, description="Number of KS decomposition level"}
|
||||
field.ks_b = { size_b=8, offset_b=16, default={Param="KS_B_W"}, description="Width of KS decomposition base"}
|
||||
|
||||
[section.info.register.regf_structure]
|
||||
description="Register file structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.reg_nb = { size_b=8, offset_b=0 , default={Param="REGF_REG_NB"}, description="Number of registers in regfile"}
|
||||
field.coef_nb = { size_b=8, offset_b=8 , default={Param="REGF_COEF_NB"}, description="Number of coefficients at regfile interface"}
|
||||
|
||||
[section.info.register.isc_structure]
|
||||
description="Instruction scheduler structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.depth = { size_b=8, offset_b=0 , default={Param="ISC_DEPTH"}, description="Number of slots in ISC lookahead buffer."}
|
||||
field.min_iop_size = { size_b=8, offset_b=8 , default={Param="MIN_IOP_SIZE"}, description="Minimum number of DOp per IOp to prevent sync_id overflow."}
|
||||
|
||||
[section.info.register.pe_properties]
|
||||
description="Processing elements parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.alu_nb = { size_b=8, offset_b=24 , default={Param="PEA_ALU_NB"}, description="Number of coefficients processed in parallel in pe_alu"}
|
||||
field.pep_regf_period = { size_b=8, offset_b=16 , default={Param="PEP_REGF_PERIOD"}, description="Number of cycles between 2 consecutive data transfer between PEP and regfile"}
|
||||
field.pem_regf_period = { size_b=8, offset_b=8 , default={Param="PEM_REGF_PERIOD"}, description="Number of cycles between 2 consecutive data transfer between PEM and regfile"}
|
||||
field.pea_regf_period = { size_b=8, offset_b=0 , default={Param="PEA_REGF_PERIOD"}, description="Number of cycles between 2 consecutive data transfer between PEA and regfile"}
|
||||
|
||||
[section.info.register.bsk_structure]
|
||||
description="BSK manager structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.bsk_cut_nb = { size_b=8, offset_b=8 , default={Param="BSK_CUT_NB"}, description="BSK cut nb"}
|
||||
|
||||
[section.info.register.ksk_structure]
|
||||
description="KSK manager structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.ksk_cut_nb = { size_b=8, offset_b=8 , default={Param="KSK_CUT_NB"}, description="KSK cut nb"}
|
||||
|
||||
[section.info.register.hbm_axi4_nb]
|
||||
description="Number of AXI4 connections to HBM"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.bsk_pc = { size_b=8, offset_b=0 , default={Param="BSK_PC"}, description="Number of HBM connections for BSK"}
|
||||
field.ksk_pc = { size_b=8, offset_b=8, default={Param="KSK_PC"}, description="Number of HBM connections for KSK"}
|
||||
field.pem_pc = { size_b=8, offset_b=16, default={Param="PEM_PC"}, description="Number of HBM connections for ciphertexts (PEM)"}
|
||||
field.glwe_pc = { size_b=8, offset_b=24, default={Param="GLWE_PC"}, description="Number of HBM connections for GLWE"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_pem]
|
||||
description="Ciphertext HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_PEM_DATA_W"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_glwe]
|
||||
description="GLWE HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_GLWE_DATA_W"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_bsk]
|
||||
description="BSK HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_BSK_DATA_W"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_ksk]
|
||||
description="KSK HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_KSK_DATA_W"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.bpip]
|
||||
offset= 0x200
|
||||
description="BPIP configuration"
|
||||
|
||||
[section.bpip.register.use]
|
||||
description="(1) Use BPIP mode, (0) use IPIP mode (default)"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
field.use_bpip = { size_b=1, offset_b=0 , default={Cst=1}, description="use"}
|
||||
field.use_opportunism = { size_b=1, offset_b=1 , default={Cst=0}, description="use opportunistic PBS flush"}
|
||||
|
||||
[section.bpip.register.timeout]
|
||||
description="Timeout for BPIP mode"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
default={Cst=0xffffffff}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.hbm_axi4_addr_1in3]
|
||||
offset= 0x400
|
||||
description="HBM AXI4 connection address offset"
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.ct]
|
||||
description="Address offset for each ciphertext HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb","_pc1_lsb", "_pc1_msb"]
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.glwe]
|
||||
description="Address offset for each GLWE HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb"]
|
||||
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.ksk]
|
||||
description="Address offset for each KSK HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb", "_pc1_lsb", "_pc1_msb", "_pc2_lsb", "_pc2_msb", "_pc3_lsb", "_pc3_msb", "_pc4_lsb", "_pc4_msb", "_pc5_lsb", "_pc5_msb", "_pc6_lsb", "_pc6_msb", "_pc7_lsb", "_pc7_msb"]
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.trc]
|
||||
description="Address offset for each trace HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb"]
|
||||
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.hbm_axi4_addr_3in3]
|
||||
description="HBM AXI4 connection address offset"
|
||||
|
||||
[section.hbm_axi4_addr_3in3.register.bsk]
|
||||
description="Address offset for each BSK HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb", "_pc1_lsb", "_pc1_msb", "_pc2_lsb", "_pc2_msb", "_pc3_lsb", "_pc3_msb", "_pc4_lsb", "_pc4_msb", "_pc5_lsb", "_pc5_msb", "_pc6_lsb", "_pc6_msb", "_pc7_lsb", "_pc7_msb"]
|
||||
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.status_1in3]
|
||||
description="HPU status of part 1in3"
|
||||
offset= 0x800
|
||||
|
||||
[section.status_1in3.register.error]
|
||||
description="Error register (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.pbs = { size_b=32, offset_b=0 , default={Cst=0}, description="HPU error part 1in3"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.status_3in3]
|
||||
description="HPU status of parts 2in3 and 3in3"
|
||||
|
||||
[section.status_3in3.register.error]
|
||||
description="Error register (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.pbs = { size_b=32, offset_b=0 , default={Cst=0}, description="HPU error part 3in3"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.ksk_avail]
|
||||
description="KSK availability configuration"
|
||||
offset= 0x1000
|
||||
|
||||
[section.ksk_avail.register.avail]
|
||||
description="KSK available bit"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
field.avail = { size_b=1, offset_b=0 , default={Cst=0}, description="avail"}
|
||||
|
||||
[section.ksk_avail.register.reset]
|
||||
description="KSK reset sequence"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.request = { size_b=1, offset_b=0 , default={Cst=0}, description="request"}
|
||||
field.done = { size_b=1, offset_b=31 , default={Cst=0}, description="done"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.bsk_avail]
|
||||
description="BSK availability configuration"
|
||||
|
||||
[section.bsk_avail.register.avail]
|
||||
description="BSK available bit"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
field.avail = { size_b=1, offset_b=0 , default={Cst=0}, description="avail"}
|
||||
|
||||
[section.bsk_avail.register.reset]
|
||||
description="BSK reset sequence"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.request = { size_b=1, offset_b=0 , default={Cst=0}, description="request"}
|
||||
field.done = { size_b=1, offset_b=31 , default={Cst=0}, description="done"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.runtime_1in3]
|
||||
description="Runtime information"
|
||||
offset= 0x2000
|
||||
|
||||
[section.runtime_1in3.register.pep_cmux_loop]
|
||||
description="PEP: CMUX iteration loop number"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.br_loop = { size_b=15, offset_b=0 , default={Cst=0}, description="PBS current BR-loop"}
|
||||
field.br_loop_c = { size_b=1, offset_b=15 , default={Cst=0}, description="PBS current BR-loop parity"}
|
||||
field.ks_loop = { size_b=15, offset_b=16 , default={Cst=0}, description="KS current KS-loop"}
|
||||
field.ks_loop_c = { size_b=1, offset_b=31 , default={Cst=0}, description="KS current KS-loop parity"}
|
||||
|
||||
[section.runtime_1in3.register.pep_pointer_0]
|
||||
description="PEP: pointers (part 1)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.pool_rp = { size_b=8, offset_b=0 , default={Cst=0}, description="PEP pool_rp"}
|
||||
field.pool_wp = { size_b=8, offset_b=8 , default={Cst=0}, description="PEP pool_wp"}
|
||||
field.ldg_pt = { size_b=8, offset_b=16 , default={Cst=0}, description="PEP ldg_pt"}
|
||||
field.ldb_pt = { size_b=8, offset_b=24 , default={Cst=0}, description="PEP ldb_pt"}
|
||||
|
||||
[section.runtime_1in3.register.pep_pointer_1]
|
||||
description="PEP: pointers (part 2)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.ks_in_rp = { size_b=8, offset_b=0 , default={Cst=0}, description="PEP ks_in_rp"}
|
||||
field.ks_in_wp = { size_b=8, offset_b=8 , default={Cst=0}, description="PEP ks_in_wp"}
|
||||
field.ks_out_rp = { size_b=8, offset_b=16 , default={Cst=0}, description="PEP ks_out_rp"}
|
||||
field.ks_out_wp = { size_b=8, offset_b=24 , default={Cst=0}, description="PEP ks_out_wp"}
|
||||
|
||||
[section.runtime_1in3.register.pep_pointer_2]
|
||||
description="PEP: pointers (part 3)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.pbs_in_rp = { size_b=8, offset_b=0 , default={Cst=0}, description="PEP pbs_in_rp"}
|
||||
field.pbs_in_wp = { size_b=8, offset_b=8 , default={Cst=0}, description="PEP pbs_in_wp"}
|
||||
field.ipip_flush_last_pbs_in_loop = { size_b=16, offset_b=16 , default={Cst=0}, description="PEP IPIP flush last pbs_in_loop"}
|
||||
|
||||
[section.runtime_1in3.register.isc_latest_instruction]
|
||||
description="ISC: 4 latest instructions received ([0] is the most recent)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
duplicate=["_0","_1","_2","_3"]
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_cnt]
|
||||
description="PEP: BPIP batch counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_flush_cnt]
|
||||
description="PEP: BPIP batch triggered by a flush counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_timeout_cnt]
|
||||
description="PEP: BPIP batch triggered by a timeout counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_waiting_batch_cnt]
|
||||
description="PEP: BPIP batch that waits the trigger counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_filling_cnt]
|
||||
description="PEP: Count batch with filled with a given number of CT (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
duplicate=["_1","_2","_3","_4","_5","_6","_7","_8","_9","_10","_11","_12","_13","_14","_15","_16"]
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_ld_ack_cnt]
|
||||
description="PEP: load BLWE ack counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_cmux_not_full_batch_cnt]
|
||||
description="PEP: not full batch CMUX counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_ipip_flush_cnt]
|
||||
description="PEP: IPIP flush CMUX counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ldb_rcp_dur]
|
||||
description="PEP: load BLWE reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ldg_req_dur]
|
||||
description="PEP: load GLWE request max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ldg_rcp_dur]
|
||||
description="PEP: load GLWE reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_load_ksk_rcp_dur]
|
||||
description="PEP: load KSK slice reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
duplicate=["_pc0","_pc1","_pc2","_pc3","_pc4","_pc5","_pc6","_pc7","_pc8","_pc9","_pc10","_pc11","_pc12","_pc13","_pc14","_pc15"]
|
||||
|
||||
|
||||
[section.runtime_1in3.register.pep_mmacc_sxt_rcp_dur]
|
||||
description="PEP: MMACC SXT reception duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_mmacc_sxt_req_dur]
|
||||
description="PEP: MMACC SXT request duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_mmacc_sxt_cmd_wait_b_dur]
|
||||
description="PEP: MMACC SXT command wait for b duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_inst_cnt]
|
||||
description="PEP: input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ack_cnt]
|
||||
description="PEP: instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_load_inst_cnt]
|
||||
description="PEM: load input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_load_ack_cnt]
|
||||
description="PEM: load instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_store_inst_cnt]
|
||||
description="PEM: store input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_store_ack_cnt]
|
||||
description="PEM: store instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pea_inst_cnt]
|
||||
description="PEA: input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pea_ack_cnt]
|
||||
description="PEA: instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.isc_inst_cnt]
|
||||
description="ISC: input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.isc_ack_cnt]
|
||||
description="ISC: instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_load_info_0]
|
||||
description="PEM: load first data)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
duplicate=["_pc0_0","_pc0_1","_pc0_2","_pc0_3","_pc1_0","_pc1_1","_pc1_2","_pc1_3"]
|
||||
|
||||
[section.runtime_1in3.register.pem_load_info_1]
|
||||
description="PEM: load first address"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
duplicate=["_pc0_lsb","_pc0_msb","_pc1_lsb","_pc1_msb"]
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_0]
|
||||
description="PEM: store info 0)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.cmd_vld = { size_b=1, offset_b=0 , default={Cst=0}, description="PEM_ST cmd vld"}
|
||||
field.cmd_rdy = { size_b=1, offset_b=1 , default={Cst=0}, description="PEM_ST cmd rdy"}
|
||||
field.pem_regf_rd_req_vld = { size_b=1, offset_b=2 , default={Cst=0}, description="PEM_ST pem_regf_rd_req_vld"}
|
||||
field.pem_regf_rd_req_rdy = { size_b=1, offset_b=3 , default={Cst=0}, description="PEM_ST pem_regf_rd_req_rdy"}
|
||||
field.brsp_fifo_in_vld = { size_b=4, offset_b=4 , default={Cst=0}, description="PEM_ST brsp_fifo_in_vld"}
|
||||
field.brsp_fifo_in_rdy = { size_b=4, offset_b=8 , default={Cst=0}, description="PEM_ST brsp_fifo_in_rdy"}
|
||||
field.rcp_fifo_in_vld = { size_b=4, offset_b=12 , default={Cst=0}, description="PEM_ST rcp_fifo_in_vld"}
|
||||
field.rcp_fifo_in_rdy = { size_b=4, offset_b=16 , default={Cst=0}, description="PEM_ST rcp_fifo_in_rdy"}
|
||||
field.r2_axi_vld = { size_b=4, offset_b=20 , default={Cst=0}, description="PEM_ST r2_axi_vld"}
|
||||
field.r2_axi_rdy = { size_b=4, offset_b=24 , default={Cst=0}, description="PEM_ST r2_axi_rdy"}
|
||||
field.c0_enough_location = { size_b=4, offset_b=28 , default={Cst=0}, description="PEM_ST c0_enough_location"}
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_1]
|
||||
description="PEM: store info 1"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.s0_cmd_vld = { size_b=4, offset_b=0 , default={Cst=0}, description="PEM_ST s0_cmd_vld"}
|
||||
field.s0_cmd_rdy = { size_b=4, offset_b=4 , default={Cst=0}, description="PEM_ST s0_cmd_rdy"}
|
||||
field.m_axi_bvalid = { size_b=4, offset_b=8 , default={Cst=0}, description="PEM_ST m_axi_bvalid"}
|
||||
field.m_axi_bready = { size_b=4, offset_b=12 , default={Cst=0}, description="PEM_ST m_axi_bready"}
|
||||
field.m_axi_wvalid = { size_b=4, offset_b=16 , default={Cst=0}, description="PEM_ST m_axi_wvalid"}
|
||||
field.m_axi_wready = { size_b=4, offset_b=20 , default={Cst=0}, description="PEM_ST m_axi_wready"}
|
||||
field.m_axi_awvalid = { size_b=4, offset_b=24 , default={Cst=0}, description="PEM_ST m_axi_awvalid"}
|
||||
field.m_axi_awready = { size_b=4, offset_b=28 , default={Cst=0}, description="PEM_ST m_axi_awready"}
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_2]
|
||||
description="PEM: store info 2"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.c0_free_loc_cnt = { size_b=16, offset_b=0 , default={Cst=0}, description="PEM_ST c0_free_loc_cnt"}
|
||||
field.brsp_bresp_cnt = { size_b=16, offset_b=16 , default={Cst=0}, description="PEM_ST brsp_bresp_cnt"}
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_3]
|
||||
description="PEM: store info 3"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.brsp_ack_seen = { size_b=16, offset_b=0 , default={Cst=0}, description="PEM_ST brsp_ack_seen"}
|
||||
field.c0_cmd_cnt = { size_b=8, offset_b=16 , default={Cst=0}, description="PEM_ST c0_cmd_cnt"}
|
||||
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.runtime_3in3]
|
||||
description="Runtime information"
|
||||
|
||||
[section.runtime_3in3.register.pep_load_bsk_rcp_dur]
|
||||
description="PEP: load BSK slice reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
duplicate=["_pc0","_pc1","_pc2","_pc3","_pc4","_pc5","_pc6","_pc7","_pc8","_pc9","_pc10","_pc11","_pc12","_pc13","_pc14","_pc15"]
|
||||
|
||||
[section.runtime_3in3.register.pep_bskif_req_info_0]
|
||||
description="PEP: BSK_IF: requester info 0"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.req_br_loop_rp = { size_b=16, offset_b=0 , default={Cst=0}, description="PEP BSK_IF requester BSK read pointer"}
|
||||
field.req_br_loop_wp = { size_b=16, offset_b=16 , default={Cst=0}, description="PEP BSK_IF requester BSK write pointer"}
|
||||
|
||||
[section.runtime_3in3.register.pep_bskif_req_info_1]
|
||||
description="PEP: BSK_IF: requester info 0"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.req_prf_br_loop = { size_b=16, offset_b=0 , default={Cst=0}, description="PEP BSK_IF requester BSK prefetch pointer"}
|
||||
field.req_parity = { size_b=1, offset_b=16 , default={Cst=0}, description="PEP BSK_IF requester BSK pointer parity"}
|
||||
field.req_assigned = { size_b=1, offset_b=31 , default={Cst=0}, description="PEP BSK_IF requester assignment"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.WorkAck]
|
||||
description="Purpose of this section"
|
||||
offset= 0x8000
|
||||
|
||||
[section.WorkAck.register.workq]
|
||||
description="Insert work in workq and read status"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.WorkAck.register.ackq]
|
||||
description="Pop ack from in ackq"
|
||||
owner="Kernel"
|
||||
read_access="ReadNotify"
|
||||
write_access="None"
|
||||
74
backends/tfhe-hpu-backend/config_store/v80/Readme.md
Normal file
74
backends/tfhe-hpu-backend/config_store/v80/Readme.md
Normal file
@@ -0,0 +1,74 @@
|
||||
NB: Versal don't have the pdi embedded in the configuration. Instead user is in charge of pdi upload in FPGA flash.
|
||||
Thus, a given configuration could works on multiple pdi.
|
||||
|
||||
# Fpga version @250MHz
|
||||
This configuration as based on the following Fpga commit:
|
||||
```
|
||||
commit ad668f931eff0c281a0848d43360da0b8813539a (HEAD -> dev/hpu_v80, origin/dev/hpu_v80, origin/baroux/dev/hpu_v80, baroux/dev/hpu_v80)
|
||||
Merge: 1489024a f308f067
|
||||
Author: Baptiste Roux <baptiste.roux@zama.ai>
|
||||
Date: Fri Feb 14 19:02:53 2025 +0100
|
||||
|
||||
[MERGE] 'dev/hpu' into baroux/dev/hpu_v80
|
||||
|
||||
Retrieved CI bugfix from dev/hpu
|
||||
```
|
||||
Tagged as `aved_v1.0`
|
||||
|
||||
Built with the following command: (i.e. versal/run_syn_hpu_msplit_3parts_psi32.sh)
|
||||
```
|
||||
TOP=top_hpu_assembly
|
||||
TOP_MSPLIT=TOP_MSPLIT_1
|
||||
TOP_BATCH=TOP_BATCH_TOPhpu_BPBS12_TPBS32
|
||||
TOP_PCMAX=TOP_PCMAX_pem2_glwe1_bsk16_ksk16
|
||||
TOP_PC=TOP_PC_pem2_glwe1_bsk8_ksk16
|
||||
APPLICATION=APPLI_msg2_carry2_pfail64_132b_gaussian_1f72dba
|
||||
NTT_MOD=NTT_MOD_goldilocks
|
||||
NTT_CORE_ARCH=NTT_CORE_ARCH_gf64
|
||||
NTT_CORE_R_PSI=NTT_CORE_R2_PSI32
|
||||
NTT_CORE_RDX_CUT=NTT_CORE_RDX_CUT_n5c6
|
||||
NTT_CORE_DIV=NTT_CORE_DIV_1
|
||||
BSK_SLOT_CUT=BSK_SLOT8_CUT8
|
||||
KSK_SLOT_CUT=KSK_SLOT8_CUT16
|
||||
KSLB=KSLB_x3y64z3
|
||||
HPU_PART=HPU_PART_gf64
|
||||
AXI_DATA_W=AXI_DATA_W_256
|
||||
FPGA=FPGA_v80
|
||||
|
||||
just build $TOP new "-F TOP_MSPLIT $TOP_MSPLIT -F TOP_BATCH $TOP_BATCH -F TOP_PCMAX $TOP_PCMAX -F TOP_PC $TOP_PC -F APPLICATION $APPLICATION -F NTT_MOD $NTT_MOD -F NTT_CORE_ARCH $NTT_CORE_ARCH -F NTT_CORE_R_PSI $NTT_CORE_R_PSI -F NTT_CORE_RDX_CUT $NTT_CORE_RDX_CUT -F NTT_CORE_DIV $NTT_CORE_DIV -F BSK_SLOT_CUT $BSK_SLOT_CUT -F KSK_SLOT_CUT $KSK_SLOT_CUT -F KSLB $KSLB -F HPU_PART $HPU_PART -F AXI_DATA_W $AXI_DATA_W -F FPGA $FPGA" | tee build_out.log
|
||||
```
|
||||
|
||||
# Fpga version @350MHz
|
||||
This configuration as based on the following Fpga commit:
|
||||
```
|
||||
commit d29dbeaccf09adfe0ee13e326f4633e14726b020 (HEAD -> baroux/dev/hpu_v80_2024.2, origin/baroux/dev/hpu_v80_2024.2)
|
||||
Author: pgardratzama <pierre.gardrat@zama.ai>
|
||||
Date: Tue Feb 11 16:12:10 2025 +0100
|
||||
|
||||
adds script to synthetize HPU 1 part PSI32
|
||||
```
|
||||
Mainly the that commit as above with flow modification from Pierre Gardrat to support Vivado 2024.2.
|
||||
NB: Based on unofficial branch and thus not tagged
|
||||
|
||||
Built with the following command: (i.e. versal/run_syn_hpu_1part_psi32.sh)
|
||||
```
|
||||
TOP=fpga_top_hpu
|
||||
TOP_MSPLIT=TOP_MSPLIT_1
|
||||
TOP_BATCH=TOP_BATCH_TOPhpu_BPBS12_TPBS32
|
||||
TOP_PCMAX=TOP_PCMAX_pem2_glwe1_bsk16_ksk16
|
||||
TOP_PC=TOP_PC_pem2_glwe1_bsk8_ksk16
|
||||
APPLICATION=APPLI_msg2_carry2_pfail64_132b_gaussian_1f72dba
|
||||
NTT_MOD=NTT_MOD_goldilocks
|
||||
NTT_CORE_ARCH=NTT_CORE_ARCH_gf64
|
||||
NTT_CORE_R_PSI=NTT_CORE_R2_PSI32
|
||||
NTT_CORE_RDX_CUT=NTT_CORE_RDX_CUT_n5c6
|
||||
NTT_CORE_DIV=NTT_CORE_DIV_1
|
||||
BSK_SLOT_CUT=BSK_SLOT8_CUT8
|
||||
KSK_SLOT_CUT=KSK_SLOT8_CUT16
|
||||
KSLB=KSLB_x3y64z3
|
||||
HPU_PART=HPU_PART_gf64
|
||||
AXI_DATA_W=AXI_DATA_W_256
|
||||
FPGA=FPGA_v80
|
||||
|
||||
just build $TOP new "-F TOP_MSPLIT $TOP_MSPLIT -F TOP_BATCH $TOP_BATCH -F TOP_PCMAX $TOP_PCMAX -F TOP_PC $TOP_PC -F APPLICATION $APPLICATION -F NTT_MOD $NTT_MOD -F NTT_CORE_ARCH $NTT_CORE_ARCH -F NTT_CORE_R_PSI $NTT_CORE_R_PSI -F NTT_CORE_RDX_CUT $NTT_CORE_RDX_CUT -F NTT_CORE_DIV $NTT_CORE_DIV -F BSK_SLOT_CUT $BSK_SLOT_CUT -F KSK_SLOT_CUT $KSK_SLOT_CUT -F KSLB $KSLB -F HPU_PART $HPU_PART -F AXI_DATA_W $AXI_DATA_W -F FPGA $FPGA" | tee build_out.log
|
||||
```
|
||||
@@ -0,0 +1,15 @@
|
||||
# CUST_0
|
||||
# Simple IOp to check the xfer between Hpu/Cpu
|
||||
# Construct constant in dest slot -> 249 (0xf9)
|
||||
SUB R0 R0 R0
|
||||
ADDS R0 R0 1
|
||||
ST TD[0].0 R0
|
||||
SUB R1 R1 R1
|
||||
ADDS R1 R1 2
|
||||
ST TD[0].1 R1
|
||||
SUB R2 R2 R2
|
||||
ADDS R2 R2 3
|
||||
ST TD[0].2 R2
|
||||
SUB R3 R3 R3
|
||||
ADDS R3 R3 3
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,11 @@
|
||||
# CUST_1
|
||||
# Simple IOp to check the xfer between Hpu/Cpu
|
||||
# Dest <- Src_a
|
||||
LD R0 TS[0].0
|
||||
LD R1 TS[0].1
|
||||
LD R2 TS[0].2
|
||||
LD R3 TS[0].3
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,25 @@
|
||||
; CUST_8
|
||||
; Simple IOp to check the ALU operation
|
||||
; Dst[0].0 <- Src[0].0 + Src[1].0
|
||||
LD R1 TS[0].0
|
||||
LD R2 TS[1].0
|
||||
ADD R0 R1 R2
|
||||
ST TD[0].0 R0
|
||||
|
||||
; Dst[0].1 <- Src[0].1 + Src[1].1
|
||||
LD R5 TS[0].1
|
||||
LD R6 TS[1].1
|
||||
ADD R4 R5 R6
|
||||
ST TD[0].2 R4
|
||||
|
||||
; Dst[0].2 <- Src[0].2 + Src[1].2
|
||||
LD R9 TS[0].2
|
||||
LD R10 TS[1].2
|
||||
ADD R8 R9 R10
|
||||
ST TD[0].2 R8
|
||||
|
||||
; Dst[0].3 <- Src[0].3 + Src[1].3
|
||||
LD R13 TS[0].3
|
||||
LD R14 TS[1].3
|
||||
ADD R12 R13 R14
|
||||
ST TD[0].3 R0
|
||||
@@ -0,0 +1,6 @@
|
||||
# CUST_16
|
||||
# Simple IOp to check PBS behavior
|
||||
# Dest <- PBSNone(Src_a.0)
|
||||
LD R0 TS[0].0
|
||||
PBS_F R0 R0 PbsNone
|
||||
ST TD[0].0 R0
|
||||
@@ -0,0 +1,15 @@
|
||||
# CUST_17
|
||||
# Simple IOp to check PBS behavior
|
||||
# Dest <- PBSNone(Src_a)
|
||||
LD R0 TS[0].0
|
||||
PBS R0 R0 PbsNone
|
||||
ST TD[0].0 R0
|
||||
LD R1 TS[0].1
|
||||
PBS R1 R1 PbsNone
|
||||
ST TD[0].1 R1
|
||||
LD R2 TS[0].2
|
||||
PBS R2 R2 PbsNone
|
||||
ST TD[0].2 R2
|
||||
LD R3 TS[0].3
|
||||
PBS_F R3 R3 PbsNone
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,23 @@
|
||||
; CUST_18
|
||||
; Simple IOp to check extraction pattern
|
||||
; Correct result:
|
||||
; * Dst[0,1] <- Src[0][0,1]
|
||||
; * Dst[2,3] <- Src[1][0,1]
|
||||
|
||||
; Pack Src[0][0,1] with a Mac and extract Carry/Msg in Dst[0][0,1]
|
||||
LD R0 TS[0].0
|
||||
LD R1 TS[0].1
|
||||
MAC R3 R1 R0 4
|
||||
PBS R4 R3 PbsMsgOnly
|
||||
PBS R5 R3 PbsCarryInMsg
|
||||
ST TD[0].0 R4
|
||||
ST TD[0].1 R5
|
||||
|
||||
; Pack Src[1][0,1] with a Mac and extract Carry/Msg in Dst[0][2,3]
|
||||
LD R10 TS[1].0
|
||||
LD R11 TS[1].1
|
||||
MAC R13 R11 R10 4
|
||||
PBS R14 R13 PbsMsgOnly
|
||||
PBS R15 R13 PbsCarryInMsg
|
||||
ST TD[0].2 R14
|
||||
ST TD[0].3 R15
|
||||
@@ -0,0 +1,19 @@
|
||||
; CUST_19
|
||||
; Simple IOp to check PbsMl2
|
||||
; Correct result:
|
||||
; * Dst[0][0] <- Src[0][0]
|
||||
; * Dst[0][1] <- 0
|
||||
; * Dst[0][2] <- Src[0][0] +1
|
||||
; * Dst[0][3] <- 0
|
||||
; i.e Cust_19(0x2) => 0x32
|
||||
|
||||
; Construct a 0 for destination padding
|
||||
SUB R16 R16 R16
|
||||
|
||||
; Apply PbsMl2 on Src[0] result goes in dest[0][0-3] (0-padded)
|
||||
LD R0 TS[0].0
|
||||
PBS_ML2_F R0 R0 PbsTestMany2
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R16
|
||||
ST TD[0].2 R1
|
||||
ST TD[0].3 R16
|
||||
@@ -0,0 +1,11 @@
|
||||
# CUST_2
|
||||
# Simple IOp to check the xfer between Hpu/Cpu
|
||||
# Dest <- Src_b
|
||||
LD R0 TS[1].0
|
||||
LD R1 TS[1].1
|
||||
LD R2 TS[1].2
|
||||
LD R3 TS[1].3
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,22 @@
|
||||
; CUST_20
|
||||
; Simple IOp to check PbsMl4
|
||||
; Correct result:
|
||||
; * Dst[0][0] <- Src[0][0]
|
||||
; * Dst[0][1] <- Src[0][0] +1
|
||||
; * Dst[0][2] <- Src[0][0] +2
|
||||
; * Dst[0][3] <- Src[0][0] +3
|
||||
; i.e Cust_20(0x0) => 0xe4
|
||||
|
||||
SUB R16 R16 R16
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R0
|
||||
ST TD[0].2 R0
|
||||
ST TD[0].3 R0
|
||||
|
||||
; Apply PbsMl4 on Src[0] result goes in dest[0][0-3]
|
||||
LD R0 TS[0].0
|
||||
PBS_ML4_F R0 R0 PbsTestMany4
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
@@ -0,0 +1,24 @@
|
||||
; CUST_21
|
||||
; Simple IOp to check PbsMl8
|
||||
; WARN: This operation required 16b ct width
|
||||
; Correct result:
|
||||
; * Dst[0][0] <- Src[0][0]
|
||||
; * Dst[0][1] <- Src[0][0] +1
|
||||
; * Dst[0][2] <- Src[0][0] +2
|
||||
; * Dst[0][3] <- Src[0][0] +3
|
||||
; * Dst[0][4] <- Src[0][0] +4
|
||||
; * Dst[0][5] <- Src[0][0] +5
|
||||
; * Dst[0][6] <- Src[0][0] +6
|
||||
; * Dst[0][7] <- Src[0][0] +7
|
||||
|
||||
; Apply PbsMl8 on Src[0] result goes in dest[0][0-7]
|
||||
LD R0 TS[0].0
|
||||
PBS_ML8_F R0 R0 PbsTestMany8
|
||||
ST TD[0].0 R0
|
||||
ST TD[0].1 R1
|
||||
ST TD[0].2 R2
|
||||
ST TD[0].3 R3
|
||||
ST TD[0].4 R4
|
||||
ST TD[0].5 R5
|
||||
ST TD[0].6 R6
|
||||
ST TD[0].7 R7
|
||||
@@ -0,0 +1,16 @@
|
||||
# CUST_3
|
||||
# Simple IOp to check isc behavior
|
||||
# Generate obvious deps and check that isc correctly issued the dop
|
||||
# Correct result must bu Dest <- Src[0]
|
||||
LD R0 TS[0].0
|
||||
LD R1 TS[0].1
|
||||
LD R2 TS[0].2
|
||||
LD R3 TS[0].3
|
||||
PBS R4 R0 PbsNone
|
||||
ST TD[0].0 R4
|
||||
PBS R4 R1 PbsNone
|
||||
ST TD[0].1 R4
|
||||
PBS R4 R2 PbsNone
|
||||
ST TD[0].2 R4
|
||||
PBS_F R4 R3 PbsNone
|
||||
ST TD[0].3 R4
|
||||
264
backends/tfhe-hpu-backend/config_store/v80/custom_iop/cust_4.asm
Normal file
264
backends/tfhe-hpu-backend/config_store/v80/custom_iop/cust_4.asm
Normal file
@@ -0,0 +1,264 @@
|
||||
# CUST_4
|
||||
# Just to check if this batch times out
|
||||
LD R0 TS[0].31
|
||||
LD R1 TS[1].31
|
||||
LD R3 TS[0].27
|
||||
LD R4 TS[1].27
|
||||
LD R6 TS[0].30
|
||||
LD R7 TS[1].30
|
||||
LD R9 TS[0].28
|
||||
LD R10 TS[1].28
|
||||
LD R12 TS[0].29
|
||||
LD R13 TS[1].29
|
||||
LD R15 TS[0].23
|
||||
LD R16 TS[1].23
|
||||
LD R18 TS[0].26
|
||||
LD R19 TS[1].26
|
||||
LD R21 TS[0].24
|
||||
LD R22 TS[1].24
|
||||
LD R24 TS[0].20
|
||||
LD R25 TS[1].20
|
||||
LD R27 TS[0].13
|
||||
LD R28 TS[1].13
|
||||
LD R30 TS[0].25
|
||||
LD R31 TS[1].25
|
||||
LD R33 TS[0].22
|
||||
LD R34 TS[1].22
|
||||
LD R36 TS[0].17
|
||||
LD R37 TS[1].17
|
||||
LD R39 TS[0].19
|
||||
LD R40 TS[1].19
|
||||
LD R42 TS[0].15
|
||||
LD R43 TS[1].15
|
||||
LD R45 TS[0].12
|
||||
LD R46 TS[1].12
|
||||
LD R48 TS[0].7
|
||||
LD R49 TS[1].7
|
||||
LD R51 TS[0].6
|
||||
LD R52 TS[1].6
|
||||
LD R54 TS[0].10
|
||||
LD R55 TS[1].10
|
||||
LD R57 TS[0].14
|
||||
LD R58 TS[1].14
|
||||
LD R60 TS[0].11
|
||||
LD R61 TS[1].11
|
||||
ADD R2 R0 R1
|
||||
ADD R5 R3 R4
|
||||
LD R63 TS[0].18
|
||||
LD R3 TS[1].18
|
||||
ADD R8 R6 R7
|
||||
ST TH.0 R6
|
||||
ST TH.1 R7
|
||||
ADD R11 R9 R10
|
||||
ST TH.2 R11
|
||||
LD R9 TH.2
|
||||
ADD R14 R12 R13
|
||||
ST TH.3 R12
|
||||
ST TH.4 R13
|
||||
ADD R17 R15 R16
|
||||
ST TH.5 R17
|
||||
ADD R20 R18 R19
|
||||
ST TH.6 R18
|
||||
ST TH.7 R19
|
||||
LD R15 TH.5
|
||||
ADD R23 R21 R22
|
||||
ST TH.8 R23
|
||||
LD R21 TH.8
|
||||
ADD R26 R24 R25
|
||||
ST TH.9 R24
|
||||
ST TH.10 R25
|
||||
ADD R29 R27 R28
|
||||
ST TH.11 R29
|
||||
LD R27 TH.11
|
||||
ADD R32 R30 R31
|
||||
ST TH.12 R30
|
||||
ST TH.13 R31
|
||||
ADD R35 R33 R34
|
||||
ST TH.14 R35
|
||||
ADD R38 R36 R37
|
||||
ST TH.15 R36
|
||||
ST TH.16 R37
|
||||
LD R33 TH.14
|
||||
PBS_ML2 R0 R2 PbsManyGenProp
|
||||
PBS_ML2 R6 R5 PbsManyGenProp
|
||||
PBS_ML2 R10 R9 PbsManyGenProp
|
||||
PBS_ML2 R12 R8 PbsManyGenProp
|
||||
PBS_ML2 R16 R14 PbsManyGenProp
|
||||
PBS_ML2 R18 R15 PbsManyGenProp
|
||||
PBS_ML2 R22 R21 PbsManyGenProp
|
||||
PBS_ML2 R24 R20 PbsManyGenProp
|
||||
PBS_ML2 R28 R27 PbsManyGenProp
|
||||
PBS_ML2 R30 R26 PbsManyGenProp
|
||||
PBS_ML2 R34 R32 PbsManyGenProp
|
||||
PBS_ML2_F R36 R33 PbsManyGenProp
|
||||
ADD R41 R39 R40
|
||||
LD R39 TS[0].16
|
||||
LD R40 TS[1].16
|
||||
ST TH.17 R38
|
||||
ST TH.18 R33
|
||||
LD R33 TS[0].1
|
||||
ST TH.19 R32
|
||||
LD R32 TS[1].1
|
||||
ST TH.20 R26
|
||||
ST TH.21 R27
|
||||
LD R27 TS[0].21
|
||||
ST TH.22 R20
|
||||
LD R20 TS[1].21
|
||||
ST TH.23 R21
|
||||
ST TH.24 R15
|
||||
LD R15 TS[0].0
|
||||
ST TH.25 R14
|
||||
LD R14 TS[1].0
|
||||
ST TH.26 R8
|
||||
ST TH.27 R9
|
||||
LD R9 TS[0].3
|
||||
ST TH.28 R5
|
||||
LD R5 TS[1].3
|
||||
ST TH.29 R2
|
||||
ADD R44 R42 R43
|
||||
LD R42 TS[0].2
|
||||
LD R43 TS[1].2
|
||||
ST TH.30 R41
|
||||
ADD R47 R45 R46
|
||||
LD R45 TS[0].9
|
||||
LD R46 TS[1].9
|
||||
ST TH.31 R44
|
||||
ADD R50 R48 R49
|
||||
LD R48 TS[0].5
|
||||
LD R49 TS[1].5
|
||||
ST TH.32 R47
|
||||
ADD R53 R51 R52
|
||||
LD R51 TS[0].4
|
||||
LD R52 TS[1].4
|
||||
ST TH.33 R50
|
||||
ADD R56 R54 R55
|
||||
LD R54 TS[0].8
|
||||
LD R55 TS[1].8
|
||||
ST TH.34 R53
|
||||
ADD R59 R57 R58
|
||||
ADD R62 R60 R61
|
||||
ADD R4 R63 R3
|
||||
ADD R38 R39 R40
|
||||
ADD R26 R33 R32
|
||||
ADD R21 R27 R20
|
||||
ADD R8 R15 R14
|
||||
ADD R2 R9 R5
|
||||
ADD R41 R42 R43
|
||||
ADD R44 R45 R46
|
||||
ADD R47 R48 R49
|
||||
ADD R50 R51 R52
|
||||
ADD R53 R54 R55
|
||||
MAC R57 R11 R7 2
|
||||
LD R58 TH.31
|
||||
LD R63 TH.32
|
||||
LD R3 TH.17
|
||||
ST TH.35 R41
|
||||
LD R39 TH.30
|
||||
ST TH.36 R21
|
||||
ST TH.37 R47
|
||||
ST TH.38 R53
|
||||
ST TH.39 R44
|
||||
ST TH.40 R50
|
||||
ST TH.41 R0
|
||||
LD R27 TH.35
|
||||
ST TH.42 R12
|
||||
ST TH.43 R13
|
||||
LD R9 TH.39
|
||||
ST TH.44 R16
|
||||
ST TH.45 R17
|
||||
LD R5 TH.37
|
||||
ST TH.46 R18
|
||||
ST TH.47 R19
|
||||
ST TH.48 R6
|
||||
LD R6 TH.40
|
||||
ST TH.49 R22
|
||||
ST TH.50 R23
|
||||
ST TH.51 R10
|
||||
LD R10 TH.38
|
||||
ST TH.52 R24
|
||||
ST TH.53 R25
|
||||
ST TH.54 R28
|
||||
LD R28 TH.33
|
||||
ST TH.55 R30
|
||||
ST TH.56 R31
|
||||
ST TH.57 R29
|
||||
LD R29 TH.36
|
||||
ST TH.58 R34
|
||||
ST TH.59 R35
|
||||
ST TH.60 R36
|
||||
LD R36 TH.34
|
||||
PBS_ML2 R60 R58 PbsManyGenProp
|
||||
PBS_ML2 R32 R38 PbsManyGenProp
|
||||
PBS_ML2 R14 R63 PbsManyGenProp
|
||||
PBS_ML2 R42 R8 PbsManyGenProp
|
||||
PBS_ML2 R48 R3 PbsManyGenProp
|
||||
PBS_ML2 R54 R62 PbsManyGenProp
|
||||
PBS_ML2 R40 R39 PbsManyGenProp
|
||||
PBS_ML2 R20 R4 PbsManyGenProp
|
||||
PBS_ML2 R46 R59 PbsManyGenProp
|
||||
PBS_ML2 R52 R26 PbsManyGenProp
|
||||
PBS_ML2 R44 R56 PbsManyGenProp
|
||||
PBS_ML2_F R50 R2 PbsManyGenProp
|
||||
LD R11 TH.45
|
||||
ST TH.61 R37
|
||||
ST TH.62 R2
|
||||
LD R2 TH.53
|
||||
ST TH.63 R56
|
||||
LD R56 TH.59
|
||||
ST TH.64 R26
|
||||
ST TH.65 R59
|
||||
LD R59 TH.43
|
||||
ST TH.66 R4
|
||||
MAC R37 R11 R57 4
|
||||
MAC R26 R2 R56 2
|
||||
MAC R4 R59 R11 2
|
||||
MAC R2 R4 R57 4
|
||||
MAC R59 R33 R61 2
|
||||
LD R58 TH.57
|
||||
LD R62 TH.56
|
||||
ADDS R4 R42 0
|
||||
MAC R38 R47 R58 2
|
||||
MAC R63 R49 R59 4
|
||||
MAC R8 R21 R49 2
|
||||
MULS R3 R43 2
|
||||
ADDS R3 R3 0
|
||||
MAC R39 R62 R41 2
|
||||
MAC R42 R8 R59 4
|
||||
MAC R21 R53 R3 4
|
||||
PBS_ML2 R0 R27 PbsManyGenProp
|
||||
PBS_ML2 R12 R9 PbsManyGenProp
|
||||
PBS_ML2 R16 R5 PbsManyGenProp
|
||||
PBS_ML2 R18 R6 PbsManyGenProp
|
||||
PBS_ML2 R22 R10 PbsManyGenProp
|
||||
PBS_ML2 R24 R28 PbsManyGenProp
|
||||
PBS_ML2 R30 R29 PbsManyGenProp
|
||||
PBS_ML2 R34 R36 PbsManyGenProp
|
||||
PBS R11 R2 PbsReduceCarryPad
|
||||
PBS R33 R4 PbsGenPropAdd
|
||||
PBS R47 R3 PbsReduceCarry2
|
||||
PBS_F R49 R42 PbsReduceCarryPad
|
||||
MAC R43 R1 R53 2
|
||||
ST TD[0].0 R33
|
||||
LD R29 TH.61
|
||||
MAC R8 R47 R52 4
|
||||
ADDS R27 R11 1
|
||||
MAC R9 R31 R39 4
|
||||
ADDS R5 R49 1
|
||||
MAC R6 R43 R3 4
|
||||
MAC R10 R45 R13 2
|
||||
MAC R28 R23 R25 2
|
||||
MAC R36 R29 R31 2
|
||||
MAC R2 R19 R51 2
|
||||
MAC R4 R35 R17 2
|
||||
MAC R1 R13 R28 4
|
||||
MAC R53 R10 R28 4
|
||||
MAC R47 R36 R39 4
|
||||
MAC R52 R17 R2 4
|
||||
MAC R11 R4 R2 4
|
||||
PBS R62 R21 PbsReduceCarry3
|
||||
PBS R42 R8 PbsGenPropAdd
|
||||
PBS R33 R6 PbsReduceCarryPad
|
||||
PBS R49 R53 PbsReduceCarryPad
|
||||
PBS R43 R47 PbsReduceCarryPad
|
||||
PBS_F R3 R11 PbsReduceCarryPad
|
||||
MAC R45 R62 R0 4
|
||||
@@ -0,0 +1,19 @@
|
||||
; CUST_8
|
||||
; Simple IOp to check the ALU operation
|
||||
; Dst[0].0 <- Src[0].0 + Src[1].0
|
||||
LD R1 TS[0].0
|
||||
LD R2 TS[1].0
|
||||
ADD R0 R1 R2
|
||||
ST TD[0].0 R0
|
||||
|
||||
; Dst[0].1 <- Src[0].1 - Src[1].1
|
||||
LD R5 TS[0].1
|
||||
LD R6 TS[1].1
|
||||
SUB R4 R5 R6
|
||||
ST TD[0].1 R4
|
||||
|
||||
; Dst[0].2 <- Src[0].2 + (Src[1].2 *4)
|
||||
LD R9 TS[0].2
|
||||
LD R10 TS[1].2
|
||||
MAC R8 R9 R10 4
|
||||
ST TD[0].2 R8
|
||||
@@ -0,0 +1,21 @@
|
||||
; CUST_9
|
||||
; Simple IOp to check the ALU Scalar operation
|
||||
; Dst[0].0 <- Src[0].0 + Imm[0].0
|
||||
LD R1 TS[0].0
|
||||
ADDS R0 R1 TI[0].0
|
||||
ST TD[0].0 R0
|
||||
|
||||
; Dst[0].1 <- Src[0].1 - Imm[0].1
|
||||
LD R5 TS[0].1
|
||||
SUBS R4 R5 TI[0].1
|
||||
ST TD[0].1 R4
|
||||
|
||||
; Dst[0].2 <- Imm[0].2 - Src[0].2
|
||||
LD R9 TS[0].2
|
||||
SSUB R8 R9 TI[0].2
|
||||
ST TD[0].2 R8
|
||||
|
||||
; Dst[0].3 <- Src[0].3 * Imm[0].3
|
||||
LD R13 TS[0].3
|
||||
MULS R12 R13 TI[0].3
|
||||
ST TD[0].3 R12
|
||||
112
backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml
Normal file
112
backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml
Normal file
@@ -0,0 +1,112 @@
|
||||
|
||||
[fpga]
|
||||
regmap=["${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_cfg_1in3.toml",
|
||||
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_cfg_3in3.toml",
|
||||
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_prc_1in3.toml",
|
||||
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_prc_3in3.toml"]
|
||||
polling_us=10
|
||||
[fpga.ffi.V80]
|
||||
ami_id=1 # First ami device in the list
|
||||
qdma_h2c="/dev/qdma${V80_PCIE_DEV}001-MM-1"
|
||||
qdma_c2h="/dev/qdma${V80_PCIE_DEV}001-MM-2"
|
||||
|
||||
[rtl]
|
||||
bpip_use = true
|
||||
bpip_use_opportunism = true
|
||||
bpip_timeout = 100_000
|
||||
|
||||
[board]
|
||||
ct_mem = 32768
|
||||
ct_pc = [
|
||||
{Hbm= {pc=32}},
|
||||
{Hbm= {pc=33}},
|
||||
]
|
||||
heap_size = 16384
|
||||
|
||||
|
||||
lut_mem = 256
|
||||
lut_pc = {Hbm={pc=34}}
|
||||
|
||||
fw_size= 16777216 # i.e. 16 MiB
|
||||
fw_pc = {Ddr= {offset= 0x3900_0000}} # NB: Allocation must take place in the Discret DDR
|
||||
|
||||
bsk_pc = [
|
||||
{Hbm={pc=8}},
|
||||
{Hbm={pc=12}},
|
||||
{Hbm={pc=24}},
|
||||
{Hbm={pc=28}},
|
||||
{Hbm={pc=40}},
|
||||
{Hbm={pc=44}},
|
||||
{Hbm={pc=56}},
|
||||
{Hbm={pc=60}}
|
||||
]
|
||||
|
||||
ksk_pc = [
|
||||
{Hbm={pc=0}},
|
||||
{Hbm={pc=1}},
|
||||
{Hbm={pc=2}},
|
||||
{Hbm={pc=3}},
|
||||
{Hbm={pc=4}},
|
||||
{Hbm={pc=5}},
|
||||
{Hbm={pc=6}},
|
||||
{Hbm={pc=7}},
|
||||
{Hbm={pc=16}},
|
||||
{Hbm={pc=17}},
|
||||
{Hbm={pc=18}},
|
||||
{Hbm={pc=19}},
|
||||
{Hbm={pc=20}},
|
||||
{Hbm={pc=21}},
|
||||
{Hbm={pc=22}},
|
||||
{Hbm={pc=23}}
|
||||
]
|
||||
|
||||
trace_pc = {Hbm={pc=35}}
|
||||
trace_depth = 32 # In MB
|
||||
|
||||
[firmware]
|
||||
#implementation = "Ilp"
|
||||
implementation = "Llt"
|
||||
integer_w=[2,4,6,8,10,12,14,16,32,64,128]
|
||||
min_batch_size = 11
|
||||
kogge_cfg = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/kogge_cfg.toml"
|
||||
custom_iop.'IOP[0]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_0.asm"
|
||||
custom_iop.'IOP[1]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_1.asm"
|
||||
custom_iop.'IOP[2]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_2.asm"
|
||||
custom_iop.'IOP[3]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_3.asm"
|
||||
custom_iop.'IOP[4]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_4.asm"
|
||||
custom_iop.'IOP[8]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_8.asm"
|
||||
custom_iop.'IOP[9]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_9.asm"
|
||||
custom_iop.'IOP[16]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_16.asm"
|
||||
custom_iop.'IOP[17]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_17.asm"
|
||||
custom_iop.'IOP[18]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_18.asm"
|
||||
custom_iop.'IOP[19]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_19.asm"
|
||||
custom_iop.'IOP[20]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_20.asm"
|
||||
custom_iop.'IOP[21]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_21.asm"
|
||||
|
||||
[firmware.op_cfg.default]
|
||||
fill_batch_fifo = true
|
||||
min_batch_size = false
|
||||
use_tiers = false
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
|
||||
[firmware.op_cfg.by_op.MUL]
|
||||
fill_batch_fifo = false
|
||||
min_batch_size = false
|
||||
use_tiers = false
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
|
||||
[firmware.op_cfg.by_op.MULS]
|
||||
fill_batch_fifo = false
|
||||
min_batch_size = false
|
||||
use_tiers = false
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
|
||||
[firmware.op_cfg.by_op.ERC_20]
|
||||
fill_batch_fifo = true
|
||||
min_batch_size = false
|
||||
use_tiers = true
|
||||
flush_behaviour = "Patient"
|
||||
flush = true
|
||||
@@ -0,0 +1,256 @@
|
||||
module_name="hpu_regif_core_cfg_1in3"
|
||||
description="HPU top-level register interface. Used by the host to retrieve design information, and to configure it."
|
||||
word_size_b = 32
|
||||
offset = 0x00
|
||||
range = 0x10000
|
||||
ext_pkg = ["axi_if_common_param_pkg", "axi_if_shell_axil_pkg"]
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.entry_cfg_1in3]
|
||||
description="entry_cfg_1in3 section with known value used for debug."
|
||||
offset= 0x0
|
||||
|
||||
[section.entry_cfg_1in3.register.dummy_val0]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x01010101}
|
||||
|
||||
[section.entry_cfg_1in3.register.dummy_val1]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x11111111}
|
||||
|
||||
[section.entry_cfg_1in3.register.dummy_val2]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x21212121}
|
||||
|
||||
|
||||
[section.entry_cfg_1in3.register.dummy_val3]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x31313131}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.info]
|
||||
description="RTL architecture parameters"
|
||||
offset= 0x10
|
||||
|
||||
[section.info.register.version]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="VERSION"}
|
||||
|
||||
[section.info.register.ntt_architecture]
|
||||
description="NTT architecture"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="NTT_CORE_ARCH"}
|
||||
|
||||
[section.info.register.ntt_structure]
|
||||
description="NTT structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.radix = { size_b=8, offset_b=0 , default={Param="R"}, description="NTT radix"}
|
||||
field.psi = { size_b=8, offset_b=8 , default={Param="PSI"}, description="NTT psi"}
|
||||
field.div = { size_b=8, offset_b=16, default={Param="BWD_PSI_DIV"}, description="NTT backward div"}
|
||||
field.delta = { size_b=8, offset_b=24, default={Param="DELTA"}, description="NTT network delta (for wmm arch)"}
|
||||
|
||||
[section.info.register.ntt_rdx_cut]
|
||||
description="NTT radix cuts, in log2 unit (for gf64 arch)"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.radix_cut0 = { size_b=4, offset_b=0 , default={Param="NTT_RDX_CUT_S_0"}, description="NTT radix cut #0"}
|
||||
field.radix_cut1 = { size_b=4, offset_b=4 , default={Param="NTT_RDX_CUT_S_1"}, description="NTT radix cut #1"}
|
||||
field.radix_cut2 = { size_b=4, offset_b=8 , default={Param="NTT_RDX_CUT_S_2"}, description="NTT radix cut #2"}
|
||||
field.radix_cut3 = { size_b=4, offset_b=12, default={Param="NTT_RDX_CUT_S_3"}, description="NTT radix cut #3"}
|
||||
field.radix_cut4 = { size_b=4, offset_b=16, default={Param="NTT_RDX_CUT_S_4"}, description="NTT radix cut #4"}
|
||||
field.radix_cut5 = { size_b=4, offset_b=20, default={Param="NTT_RDX_CUT_S_5"}, description="NTT radix cut #5"}
|
||||
field.radix_cut6 = { size_b=4, offset_b=24, default={Param="NTT_RDX_CUT_S_6"}, description="NTT radix cut #6"}
|
||||
field.radix_cut7 = { size_b=4, offset_b=28, default={Param="NTT_RDX_CUT_S_7"}, description="NTT radix cut #7"}
|
||||
|
||||
[section.info.register.ntt_pbs]
|
||||
description="Maximum number of PBS in the NTT pipeline"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.batch_pbs_nb = { size_b=8, offset_b=0 , default={Param="BATCH_PBS_NB"}, description="Maximum number of PBS in the NTT pipe"}
|
||||
field.total_pbs_nb = { size_b=8, offset_b=8 , default={Param="TOTAL_PBS_NB"}, description="Maximum number of PBS stored in PEP buffer"}
|
||||
|
||||
[section.info.register.ntt_modulo]
|
||||
description="Code associated to the NTT prime"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="MOD_NTT_NAME"}
|
||||
|
||||
[section.info.register.application]
|
||||
description="Code associated with the application"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="APPLICATION_NAME"}
|
||||
|
||||
[section.info.register.ks_structure]
|
||||
description="Key-switch structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.x = { size_b=8, offset_b=0 , default={Param="LBX"}, description="Number of coefficients on X dimension"}
|
||||
field.y = { size_b=8, offset_b=8 , default={Param="LBY"}, description="Number of coefficients on Y dimension"}
|
||||
field.z = { size_b=8, offset_b=16, default={Param="LBZ"}, description="Number of coefficients on Z dimension"}
|
||||
|
||||
[section.info.register.ks_crypto_param]
|
||||
description="Key-switch crypto parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.mod_ksk_w = { size_b=8, offset_b=0 , default={Param="MOD_KSK_W"}, description="Width of KSK modulo"}
|
||||
field.ks_l = { size_b=8, offset_b=8 , default={Param="KS_L"}, description="Number of KS decomposition level"}
|
||||
field.ks_b = { size_b=8, offset_b=16, default={Param="KS_B_W"}, description="Width of KS decomposition base"}
|
||||
|
||||
[section.info.register.regf_structure]
|
||||
description="Register file structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.reg_nb = { size_b=8, offset_b=0 , default={Param="REGF_REG_NB"}, description="Number of registers in regfile"}
|
||||
field.coef_nb = { size_b=8, offset_b=8 , default={Param="REGF_COEF_NB"}, description="Number of coefficients at regfile interface"}
|
||||
|
||||
[section.info.register.isc_structure]
|
||||
description="Instruction scheduler structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.depth = { size_b=8, offset_b=0 , default={Param="ISC_DEPTH"}, description="Number of slots in ISC lookahead buffer."}
|
||||
field.min_iop_size = { size_b=8, offset_b=8 , default={Param="MIN_IOP_SIZE"}, description="Minimum number of DOp per IOp to prevent sync_id overflow."}
|
||||
|
||||
[section.info.register.pe_properties]
|
||||
description="Processing elements parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.alu_nb = { size_b=8, offset_b=24 , default={Param="PEA_ALU_NB"}, description="Number of coefficients processed in parallel in pe_alu"}
|
||||
field.pep_regf_period = { size_b=8, offset_b=16 , default={Param="PEP_REGF_PERIOD"}, description="Number of cycles between 2 consecutive data transfer between PEP and regfile"}
|
||||
field.pem_regf_period = { size_b=8, offset_b=8 , default={Param="PEM_REGF_PERIOD"}, description="Number of cycles between 2 consecutive data transfer between PEM and regfile"}
|
||||
field.pea_regf_period = { size_b=8, offset_b=0 , default={Param="PEA_REGF_PERIOD"}, description="Number of cycles between 2 consecutive data transfer between PEA and regfile"}
|
||||
|
||||
[section.info.register.bsk_structure]
|
||||
description="BSK manager structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.bsk_cut_nb = { size_b=8, offset_b=8 , default={Param="BSK_CUT_NB"}, description="BSK cut nb"}
|
||||
|
||||
[section.info.register.ksk_structure]
|
||||
description="KSK manager structure parameters"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.ksk_cut_nb = { size_b=8, offset_b=8 , default={Param="KSK_CUT_NB"}, description="KSK cut nb"}
|
||||
|
||||
[section.info.register.hbm_axi4_nb]
|
||||
description="Number of AXI4 connections to HBM"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.bsk_pc = { size_b=8, offset_b=0 , default={Param="BSK_PC"}, description="Number of HBM connections for BSK"}
|
||||
field.ksk_pc = { size_b=8, offset_b=8, default={Param="KSK_PC"}, description="Number of HBM connections for KSK"}
|
||||
field.pem_pc = { size_b=8, offset_b=16, default={Param="PEM_PC"}, description="Number of HBM connections for ciphertexts (PEM)"}
|
||||
field.glwe_pc = { size_b=8, offset_b=24, default={Param="GLWE_PC"}, description="Number of HBM connections for GLWE"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_pem]
|
||||
description="Ciphertext HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_PEM_DATA_W"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_glwe]
|
||||
description="GLWE HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_GLWE_DATA_W"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_bsk]
|
||||
description="BSK HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_BSK_DATA_W"}
|
||||
|
||||
[section.info.register.hbm_axi4_dataw_ksk]
|
||||
description="KSK HBM AXI4 connection data width"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Param="AXI4_KSK_DATA_W"}
|
||||
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.hbm_axi4_addr_1in3]
|
||||
offset= 0x1000
|
||||
description="HBM AXI4 connection address offset"
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.ct]
|
||||
description="Address offset for each ciphertext HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb","_pc1_lsb", "_pc1_msb"]
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.glwe]
|
||||
description="Address offset for each GLWE HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb"]
|
||||
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.ksk]
|
||||
description="Address offset for each KSK HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb", "_pc1_lsb", "_pc1_msb", "_pc2_lsb", "_pc2_msb", "_pc3_lsb", "_pc3_msb", "_pc4_lsb", "_pc4_msb", "_pc5_lsb", "_pc5_msb", "_pc6_lsb", "_pc6_msb", "_pc7_lsb", "_pc7_msb", "_pc8_lsb", "_pc8_msb", "_pc9_lsb", "_pc9_msb", "_pc10_lsb", "_pc10_msb", "_pc11_lsb", "_pc11_msb", "_pc12_lsb", "_pc12_msb", "_pc13_lsb", "_pc13_msb", "_pc14_lsb", "_pc14_msb", "_pc15_lsb", "_pc15_msb"]
|
||||
|
||||
[section.hbm_axi4_addr_1in3.register.trc]
|
||||
description="Address offset for each trace HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb"]
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.bpip]
|
||||
offset= 0x2000
|
||||
description="BPIP configuration"
|
||||
|
||||
[section.bpip.register.use]
|
||||
description="(1) Use BPIP mode, (0) use IPIP mode (default)"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
field.use_bpip = { size_b=1, offset_b=0 , default={Cst=1}, description="use"}
|
||||
field.use_opportunism = { size_b=1, offset_b=1 , default={Cst=0}, description="use opportunistic PBS flush"}
|
||||
|
||||
[section.bpip.register.timeout]
|
||||
description="Timeout for BPIP mode"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
default={Cst=0xffffffff}
|
||||
@@ -0,0 +1,51 @@
|
||||
module_name="hpu_regif_core_cfg_3in3"
|
||||
description="HPU top-level register interface. Used by the host to retrieve design information, and to configure it."
|
||||
word_size_b = 32
|
||||
offset = 0x20000
|
||||
range = 0x10000
|
||||
ext_pkg = ["axi_if_common_param_pkg", "axi_if_shell_axil_pkg"]
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.entry_cfg_3in3]
|
||||
description="entry_cfg_3in3 section with known value used for debug."
|
||||
offset= 0x0
|
||||
|
||||
[section.entry_cfg_3in3.register.dummy_val0]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x03030303}
|
||||
|
||||
[section.entry_cfg_3in3.register.dummy_val1]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x13131313}
|
||||
|
||||
[section.entry_cfg_3in3.register.dummy_val2]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x23232323}
|
||||
|
||||
[section.entry_cfg_3in3.register.dummy_val3]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x33333333}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.hbm_axi4_addr_3in3]
|
||||
description="HBM AXI4 connection address offset"
|
||||
offset= 0x10
|
||||
|
||||
[section.hbm_axi4_addr_3in3.register.bsk]
|
||||
description="Address offset for each BSK HBM AXI4 connection"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
duplicate=["_pc0_lsb", "_pc0_msb", "_pc1_lsb", "_pc1_msb", "_pc2_lsb", "_pc2_msb", "_pc3_lsb", "_pc3_msb", "_pc4_lsb", "_pc4_msb", "_pc5_lsb", "_pc5_msb", "_pc6_lsb", "_pc6_msb", "_pc7_lsb", "_pc7_msb", "_pc8_lsb", "_pc8_msb", "_pc9_lsb", "_pc9_msb", "_pc10_lsb", "_pc10_msb", "_pc11_lsb", "_pc11_msb", "_pc12_lsb", "_pc12_msb", "_pc13_lsb", "_pc13_msb", "_pc14_lsb", "_pc14_msb", "_pc15_lsb", "_pc15_msb"]
|
||||
@@ -0,0 +1,336 @@
|
||||
module_name="hpu_regif_core_prc_1in3"
|
||||
description="HPU top-level register interface. Used by the host to retrieve design information, and to configure it."
|
||||
word_size_b = 32
|
||||
offset = 0x10000
|
||||
range = 0x10000
|
||||
ext_pkg = ["axi_if_common_param_pkg", "axi_if_shell_axil_pkg"]
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.entry_prc_1in3]
|
||||
description="entry_prc_1in3 section with known value used for debug."
|
||||
offset= 0x0
|
||||
|
||||
[section.entry_prc_1in3.register.dummy_val0]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x02020202}
|
||||
|
||||
[section.entry_prc_1in3.register.dummy_val1]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x12121212}
|
||||
|
||||
[section.entry_prc_1in3.register.dummy_val2]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x22222222}
|
||||
|
||||
[section.entry_prc_1in3.register.dummy_val3]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x32323232}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.status_1in3]
|
||||
description="HPU status of part 1in3"
|
||||
offset= 0x10
|
||||
|
||||
[section.status_1in3.register.error]
|
||||
description="Error register (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.pbs = { size_b=32, offset_b=0 , default={Cst=0}, description="HPU error part 1in3"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.ksk_avail]
|
||||
description="KSK availability configuration"
|
||||
offset= 0x1000
|
||||
|
||||
[section.ksk_avail.register.avail]
|
||||
description="KSK available bit"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
field.avail = { size_b=1, offset_b=0 , default={Cst=0}, description="avail"}
|
||||
|
||||
[section.ksk_avail.register.reset]
|
||||
description="KSK reset sequence"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.request = { size_b=1, offset_b=0 , default={Cst=0}, description="request"}
|
||||
field.done = { size_b=1, offset_b=31 , default={Cst=0}, description="done"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.runtime_1in3]
|
||||
description="Runtime information"
|
||||
offset= 0x2000
|
||||
|
||||
[section.runtime_1in3.register.pep_cmux_loop]
|
||||
description="PEP: CMUX iteration loop number"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.br_loop = { size_b=15, offset_b=0 , default={Cst=0}, description="PBS current BR-loop"}
|
||||
field.br_loop_c = { size_b=1, offset_b=15 , default={Cst=0}, description="PBS current BR-loop parity"}
|
||||
field.ks_loop = { size_b=15, offset_b=16 , default={Cst=0}, description="KS current KS-loop"}
|
||||
field.ks_loop_c = { size_b=1, offset_b=31 , default={Cst=0}, description="KS current KS-loop parity"}
|
||||
|
||||
[section.runtime_1in3.register.pep_pointer_0]
|
||||
description="PEP: pointers (part 1)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.pool_rp = { size_b=8, offset_b=0 , default={Cst=0}, description="PEP pool_rp"}
|
||||
field.pool_wp = { size_b=8, offset_b=8 , default={Cst=0}, description="PEP pool_wp"}
|
||||
field.ldg_pt = { size_b=8, offset_b=16 , default={Cst=0}, description="PEP ldg_pt"}
|
||||
field.ldb_pt = { size_b=8, offset_b=24 , default={Cst=0}, description="PEP ldb_pt"}
|
||||
|
||||
[section.runtime_1in3.register.pep_pointer_1]
|
||||
description="PEP: pointers (part 2)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.ks_in_rp = { size_b=8, offset_b=0 , default={Cst=0}, description="PEP ks_in_rp"}
|
||||
field.ks_in_wp = { size_b=8, offset_b=8 , default={Cst=0}, description="PEP ks_in_wp"}
|
||||
field.ks_out_rp = { size_b=8, offset_b=16 , default={Cst=0}, description="PEP ks_out_rp"}
|
||||
field.ks_out_wp = { size_b=8, offset_b=24 , default={Cst=0}, description="PEP ks_out_wp"}
|
||||
|
||||
[section.runtime_1in3.register.pep_pointer_2]
|
||||
description="PEP: pointers (part 3)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.pbs_in_rp = { size_b=8, offset_b=0 , default={Cst=0}, description="PEP pbs_in_rp"}
|
||||
field.pbs_in_wp = { size_b=8, offset_b=8 , default={Cst=0}, description="PEP pbs_in_wp"}
|
||||
field.ipip_flush_last_pbs_in_loop = { size_b=16, offset_b=16 , default={Cst=0}, description="PEP IPIP flush last pbs_in_loop"}
|
||||
|
||||
[section.runtime_1in3.register.isc_latest_instruction]
|
||||
description="ISC: 4 latest instructions received ([0] is the most recent)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
duplicate=["_0","_1","_2","_3"]
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_cnt]
|
||||
description="PEP: BPIP batch counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_flush_cnt]
|
||||
description="PEP: BPIP batch triggered by a flush counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_timeout_cnt]
|
||||
description="PEP: BPIP batch triggered by a timeout counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_waiting_batch_cnt]
|
||||
description="PEP: BPIP batch that waits the trigger counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_bpip_batch_filling_cnt]
|
||||
description="PEP: Count batch with filled with a given number of CT (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
duplicate=["_1","_2","_3","_4","_5","_6","_7","_8","_9","_10","_11","_12","_13","_14","_15","_16"]
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_ld_ack_cnt]
|
||||
description="PEP: load BLWE ack counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_cmux_not_full_batch_cnt]
|
||||
description="PEP: not full batch CMUX counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_seq_ipip_flush_cnt]
|
||||
description="PEP: IPIP flush CMUX counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ldb_rcp_dur]
|
||||
description="PEP: load BLWE reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ldg_req_dur]
|
||||
description="PEP: load GLWE request max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ldg_rcp_dur]
|
||||
description="PEP: load GLWE reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_load_ksk_rcp_dur]
|
||||
description="PEP: load KSK slice reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
duplicate=["_pc0","_pc1","_pc2","_pc3","_pc4","_pc5","_pc6","_pc7","_pc8","_pc9","_pc10","_pc11","_pc12","_pc13","_pc14","_pc15"]
|
||||
|
||||
|
||||
[section.runtime_1in3.register.pep_mmacc_sxt_rcp_dur]
|
||||
description="PEP: MMACC SXT reception duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_mmacc_sxt_req_dur]
|
||||
description="PEP: MMACC SXT request duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_mmacc_sxt_cmd_wait_b_dur]
|
||||
description="PEP: MMACC SXT command wait for b duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_inst_cnt]
|
||||
description="PEP: input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pep_ack_cnt]
|
||||
description="PEP: instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_load_inst_cnt]
|
||||
description="PEM: load input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_load_ack_cnt]
|
||||
description="PEM: load instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_store_inst_cnt]
|
||||
description="PEM: store input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_store_ack_cnt]
|
||||
description="PEM: store instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pea_inst_cnt]
|
||||
description="PEA: input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pea_ack_cnt]
|
||||
description="PEA: instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.isc_inst_cnt]
|
||||
description="ISC: input instruction counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.isc_ack_cnt]
|
||||
description="ISC: instruction acknowledge counter (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
|
||||
[section.runtime_1in3.register.pem_load_info_0]
|
||||
description="PEM: load first data)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
duplicate=["_pc0_0","_pc0_1","_pc0_2","_pc0_3","_pc1_0","_pc1_1","_pc1_2","_pc1_3"]
|
||||
|
||||
[section.runtime_1in3.register.pem_load_info_1]
|
||||
description="PEM: load first address"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
duplicate=["_pc0_lsb","_pc0_msb","_pc1_lsb","_pc1_msb"]
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_0]
|
||||
description="PEM: store info 0)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.cmd_vld = { size_b=1, offset_b=0 , default={Cst=0}, description="PEM_ST cmd vld"}
|
||||
field.cmd_rdy = { size_b=1, offset_b=1 , default={Cst=0}, description="PEM_ST cmd rdy"}
|
||||
field.pem_regf_rd_req_vld = { size_b=1, offset_b=2 , default={Cst=0}, description="PEM_ST pem_regf_rd_req_vld"}
|
||||
field.pem_regf_rd_req_rdy = { size_b=1, offset_b=3 , default={Cst=0}, description="PEM_ST pem_regf_rd_req_rdy"}
|
||||
field.brsp_fifo_in_vld = { size_b=4, offset_b=4 , default={Cst=0}, description="PEM_ST brsp_fifo_in_vld"}
|
||||
field.brsp_fifo_in_rdy = { size_b=4, offset_b=8 , default={Cst=0}, description="PEM_ST brsp_fifo_in_rdy"}
|
||||
field.rcp_fifo_in_vld = { size_b=4, offset_b=12 , default={Cst=0}, description="PEM_ST rcp_fifo_in_vld"}
|
||||
field.rcp_fifo_in_rdy = { size_b=4, offset_b=16 , default={Cst=0}, description="PEM_ST rcp_fifo_in_rdy"}
|
||||
field.r2_axi_vld = { size_b=4, offset_b=20 , default={Cst=0}, description="PEM_ST r2_axi_vld"}
|
||||
field.r2_axi_rdy = { size_b=4, offset_b=24 , default={Cst=0}, description="PEM_ST r2_axi_rdy"}
|
||||
field.c0_enough_location = { size_b=4, offset_b=28 , default={Cst=0}, description="PEM_ST c0_enough_location"}
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_1]
|
||||
description="PEM: store info 1"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.s0_cmd_vld = { size_b=4, offset_b=0 , default={Cst=0}, description="PEM_ST s0_cmd_vld"}
|
||||
field.s0_cmd_rdy = { size_b=4, offset_b=4 , default={Cst=0}, description="PEM_ST s0_cmd_rdy"}
|
||||
field.m_axi_bvalid = { size_b=4, offset_b=8 , default={Cst=0}, description="PEM_ST m_axi_bvalid"}
|
||||
field.m_axi_bready = { size_b=4, offset_b=12 , default={Cst=0}, description="PEM_ST m_axi_bready"}
|
||||
field.m_axi_wvalid = { size_b=4, offset_b=16 , default={Cst=0}, description="PEM_ST m_axi_wvalid"}
|
||||
field.m_axi_wready = { size_b=4, offset_b=20 , default={Cst=0}, description="PEM_ST m_axi_wready"}
|
||||
field.m_axi_awvalid = { size_b=4, offset_b=24 , default={Cst=0}, description="PEM_ST m_axi_awvalid"}
|
||||
field.m_axi_awready = { size_b=4, offset_b=28 , default={Cst=0}, description="PEM_ST m_axi_awready"}
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_2]
|
||||
description="PEM: store info 2"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.c0_free_loc_cnt = { size_b=16, offset_b=0 , default={Cst=0}, description="PEM_ST c0_free_loc_cnt"}
|
||||
field.brsp_bresp_cnt = { size_b=16, offset_b=16 , default={Cst=0}, description="PEM_ST brsp_bresp_cnt"}
|
||||
|
||||
[section.runtime_1in3.register.pem_store_info_3]
|
||||
description="PEM: store info 3"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.brsp_ack_seen = { size_b=16, offset_b=0 , default={Cst=0}, description="PEM_ST brsp_ack_seen"}
|
||||
field.c0_cmd_cnt = { size_b=8, offset_b=16 , default={Cst=0}, description="PEM_ST c0_cmd_cnt"}
|
||||
@@ -0,0 +1,100 @@
|
||||
module_name="hpu_regif_core_prc_3in3"
|
||||
description="HPU top-level register interface. Used by the host to retrieve design information, and to configure it."
|
||||
word_size_b = 32
|
||||
offset = 0x30000
|
||||
range = 0x10000
|
||||
ext_pkg = ["axi_if_common_param_pkg", "axi_if_shell_axil_pkg"]
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.entry_prc_3in3]
|
||||
description="entry_prc_3in3 section with known value used for debug."
|
||||
offset= 0x0
|
||||
|
||||
[section.entry_prc_3in3.register.dummy_val0]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x04040404}
|
||||
|
||||
[section.entry_prc_3in3.register.dummy_val1]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x14141414}
|
||||
|
||||
[section.entry_prc_3in3.register.dummy_val2]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x24242424}
|
||||
|
||||
[section.entry_prc_3in3.register.dummy_val3]
|
||||
description="RTL version"
|
||||
owner="Parameter"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
default={Cst=0x34343434}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.status_3in3]
|
||||
description="HPU status of parts 2in3 and 3in3"
|
||||
offset= 0x10
|
||||
|
||||
[section.status_3in3.register.error]
|
||||
description="Error register (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.pbs = { size_b=32, offset_b=0 , default={Cst=0}, description="HPU error part 3in3"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.bsk_avail]
|
||||
description="BSK availability configuration"
|
||||
offset= 0x1000
|
||||
|
||||
[section.bsk_avail.register.avail]
|
||||
description="BSK available bit"
|
||||
owner="User"
|
||||
read_access="Read"
|
||||
write_access="Write"
|
||||
field.avail = { size_b=1, offset_b=0 , default={Cst=0}, description="avail"}
|
||||
|
||||
[section.bsk_avail.register.reset]
|
||||
description="BSK reset sequence"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
field.request = { size_b=1, offset_b=0 , default={Cst=0}, description="request"}
|
||||
field.done = { size_b=1, offset_b=31 , default={Cst=0}, description="done"}
|
||||
|
||||
# =====================================================================================================================
|
||||
[section.runtime_3in3]
|
||||
description="Runtime information"
|
||||
offset= 0x2000
|
||||
|
||||
[section.runtime_3in3.register.pep_load_bsk_rcp_dur]
|
||||
description="PEP: load BSK slice reception max duration (Could be reset by user)"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="WriteNotify"
|
||||
duplicate=["_pc0","_pc1","_pc2","_pc3","_pc4","_pc5","_pc6","_pc7","_pc8","_pc9","_pc10","_pc11","_pc12","_pc13","_pc14","_pc15"]
|
||||
|
||||
[section.runtime_3in3.register.pep_bskif_req_info_0]
|
||||
description="PEP: BSK_IF: requester info 0"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.req_br_loop_rp = { size_b=16, offset_b=0 , default={Cst=0}, description="PEP BSK_IF requester BSK read pointer"}
|
||||
field.req_br_loop_wp = { size_b=16, offset_b=16 , default={Cst=0}, description="PEP BSK_IF requester BSK write pointer"}
|
||||
|
||||
[section.runtime_3in3.register.pep_bskif_req_info_1]
|
||||
description="PEP: BSK_IF: requester info 0"
|
||||
owner="Kernel"
|
||||
read_access="Read"
|
||||
write_access="None"
|
||||
field.req_prf_br_loop = { size_b=16, offset_b=0 , default={Cst=0}, description="PEP BSK_IF requester BSK prefetch pointer"}
|
||||
field.req_parity = { size_b=1, offset_b=16 , default={Cst=0}, description="PEP BSK_IF requester BSK pointer parity"}
|
||||
field.req_assigned = { size_b=1, offset_b=31 , default={Cst=0}, description="PEP BSK_IF requester assignment"}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 269 KiB |
12
backends/tfhe-hpu-backend/python/README
Normal file
12
backends/tfhe-hpu-backend/python/README
Normal file
@@ -0,0 +1,12 @@
|
||||
This contains a small library to read trace files retrieved from the hardware or the mockup.
|
||||
|
||||
To run, please add the lib directory to your PYTHONPATH:
|
||||
|
||||
export PYTHONPATH=$(readlink -m ./lib)
|
||||
|
||||
Make sure you start from a fresh python virtual environment and install the requirements in
|
||||
requirements.txt:
|
||||
|
||||
python -m venv new_env
|
||||
source new_env/bin/activate
|
||||
pip3 install -r requirements.txt
|
||||
28
backends/tfhe-hpu-backend/python/bin/demo.py
Executable file
28
backends/tfhe-hpu-backend/python/bin/demo.py
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from pandas import DataFrame
|
||||
from isctrace.analysis import Refilled, Retired, Trace
|
||||
|
||||
freq_mhz = 300
|
||||
|
||||
iops = Trace.from_hw("data/trace.json")
|
||||
|
||||
def analyze_iop(iop):
|
||||
retired = Retired(iop)
|
||||
|
||||
# Print the retired instructions as a table
|
||||
print(retired.to_df().to_string())
|
||||
|
||||
# Print a batch latency table
|
||||
latency_table = retired.pbs_latency_table(freq_mhz=freq_mhz).drop(columns='data')
|
||||
print(latency_table)
|
||||
|
||||
# And the runtime
|
||||
runtime = retired.runtime_us(freq_mhz=freq_mhz)
|
||||
print(f"batches: {latency_table['count'].sum()}")
|
||||
print(f"Runtime: {runtime}us")
|
||||
|
||||
if __name__ == "__main__":
|
||||
analyze_iop(iops[0])
|
||||
|
||||
# vim: fdm=marker
|
||||
7628
backends/tfhe-hpu-backend/python/data/trace.json
Normal file
7628
backends/tfhe-hpu-backend/python/data/trace.json
Normal file
File diff suppressed because it is too large
Load Diff
3
backends/tfhe-hpu-backend/python/lib/example.json
Normal file
3
backends/tfhe-hpu-backend/python/lib/example.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c3701d5e7d53eef6478a1b03a2c8e32cf5d20c1eb6829e754fe1ced4a0a16bed
|
||||
size 693363
|
||||
@@ -0,0 +1,4 @@
|
||||
from . import hw
|
||||
from . import fmt
|
||||
from . import analysis
|
||||
from . import mockup
|
||||
300
backends/tfhe-hpu-backend/python/lib/isctrace/analysis.py
Normal file
300
backends/tfhe-hpu-backend/python/lib/isctrace/analysis.py
Normal file
@@ -0,0 +1,300 @@
|
||||
# An abstraction layer that can be use to analyze both mockup and hardware
|
||||
# traces
|
||||
|
||||
import sys
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from itertools import tee, chain, starmap
|
||||
from operator import attrgetter, sub
|
||||
from typing import Iterable, Iterator
|
||||
|
||||
import numpy as np
|
||||
from pandas import DataFrame
|
||||
|
||||
def delta(a: Iterable[float]):
|
||||
a, b = tee(a, 2)
|
||||
b = chain(range(0,1), b)
|
||||
return starmap(sub, zip(a,b))
|
||||
|
||||
def group_by_time(it, timef, threshold):
|
||||
try:
|
||||
batch = [next(it)]
|
||||
ptime = timef(batch[0])
|
||||
for obj, time in map(lambda i: (i, timef(i)), it):
|
||||
delta = time - ptime
|
||||
if (delta < threshold):
|
||||
batch.append(obj)
|
||||
else:
|
||||
yield batch
|
||||
batch = [obj]
|
||||
ptime = time
|
||||
if(len(batch)):
|
||||
yield batch
|
||||
except StopIteration:
|
||||
return
|
||||
|
||||
class BaseEvent:
|
||||
def as_dict(self):
|
||||
return {'event': self.__class__.__name__}
|
||||
|
||||
class InsnEvent:
|
||||
def as_dict(self):
|
||||
ret = BaseEvent.as_dict(self)
|
||||
ret.update({'insn': str(self.insn)})
|
||||
return ret
|
||||
|
||||
class Refill(InsnEvent):
|
||||
def __init__(self, insn):
|
||||
self.insn = insn
|
||||
|
||||
class Issue(InsnEvent):
|
||||
def __init__(self, insn):
|
||||
self.insn = insn
|
||||
|
||||
class Retire(InsnEvent):
|
||||
def __init__(self, insn):
|
||||
self.insn = insn
|
||||
|
||||
class RdUnlock(InsnEvent):
|
||||
def __init__(self, insn):
|
||||
self.insn = insn
|
||||
|
||||
class ReqTimeout(BaseEvent):
|
||||
def __init__(self, stamp):
|
||||
self.timestamp = stamp
|
||||
def as_dict(self):
|
||||
ret = super().as_dict()
|
||||
ret.update({'data': f"{self.__dict__}"})
|
||||
return ret
|
||||
|
||||
class Timeout(BaseEvent):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class DelTimeout(BaseEvent):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class BatchStart(BaseEvent):
|
||||
def __init__(self, pe_id, issued):
|
||||
self.pe_id = pe_id
|
||||
self.issued = issued
|
||||
def as_dict(self):
|
||||
ret = super().as_dict()
|
||||
ret.update({'data': f"{self.__dict__}"})
|
||||
return ret
|
||||
|
||||
"""
|
||||
A trace event
|
||||
"""
|
||||
class Event:
|
||||
def __init__(self, timestamp, data):
|
||||
self.timestamp = timestamp
|
||||
self.data = data
|
||||
|
||||
def as_dict(self):
|
||||
ret = {'timestamp': self.timestamp}
|
||||
ret.update(self.data.as_dict())
|
||||
return ret
|
||||
|
||||
"""
|
||||
A simplified instruction
|
||||
"""
|
||||
class Instruction:
|
||||
def __init__(self, opcode, args):
|
||||
self.opcode = opcode
|
||||
self.args = args
|
||||
|
||||
def is_flush(self):
|
||||
return self.opcode.endswith("_F")
|
||||
|
||||
def is_pbs(self):
|
||||
return self.opcode.startswith("PBS")
|
||||
|
||||
def as_dict(self):
|
||||
return self.__dict__
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.opcode} {self.args}"
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
return hash(self) == hash(other)
|
||||
|
||||
class Batch:
|
||||
def __init__(self, insns, latency = None):
|
||||
self._insns = insns
|
||||
self.latency = self._insns[-1].latency if latency is not None else latency
|
||||
|
||||
def reltime(self):
|
||||
return max(map(lambda x: x.reltime, self._insns))
|
||||
|
||||
def __len__(self):
|
||||
return len(self._insns)
|
||||
|
||||
def __getitem__(self, k):
|
||||
return self._insns[k]
|
||||
|
||||
"""
|
||||
Accumulator class for instruction latency
|
||||
"""
|
||||
class Latency:
|
||||
def __init__(self):
|
||||
self.acc = []
|
||||
self.data = set()
|
||||
|
||||
def append(self, other, data):
|
||||
self.acc.append(other.latency)
|
||||
self.data.add(data)
|
||||
|
||||
def as_dict(self):
|
||||
if len(self.acc):
|
||||
npa = np.array(list(filter(lambda x: x != np.NAN, self.acc)),
|
||||
dtype=float)
|
||||
return {"min": npa.min(), "avg": npa.mean(),
|
||||
"max": npa.max(), "sum": npa.sum(),
|
||||
"count": len(npa), "data": self.data}
|
||||
else:
|
||||
return {"min": 'NA', "avg": 'NA',
|
||||
"max": 'NA', "sum": 'NA',
|
||||
"count": 0, "data": self.data}
|
||||
|
||||
class InstructionStats:
|
||||
def __init__(self, insn, latency, timestamp, delta, reltime):
|
||||
self.timestamp = timestamp
|
||||
self.latency = latency
|
||||
self.delta = delta
|
||||
self.reltime = reltime
|
||||
self.insn = insn
|
||||
|
||||
def as_dict(self):
|
||||
ret = {
|
||||
'timestamp': self.timestamp,
|
||||
'latency': self.latency,
|
||||
'delta': self.delta,
|
||||
'reltime': self.reltime,
|
||||
}
|
||||
if self.insn is not None:
|
||||
ret.update(self.insn.as_dict())
|
||||
return ret
|
||||
|
||||
def peek(it: Iterable):
|
||||
ret, copy = tee(iter(it), 2)
|
||||
try:
|
||||
val = next(copy)
|
||||
except StopIteration:
|
||||
val = None
|
||||
return ret, val
|
||||
|
||||
"""
|
||||
Iterable yielding Stats objects when iterated, results are not cached so don't
|
||||
save the results if you want them more than once.
|
||||
"""
|
||||
class Retired:
|
||||
BATCH_THRESHOLD = 150000
|
||||
|
||||
def __init__(self, trace: Iterable['Event']):
|
||||
self._events = list(self._filter(trace))
|
||||
|
||||
@staticmethod
|
||||
def _filter(events: Iterable['Event']):
|
||||
isn_map = {}
|
||||
events, first = peek(events)
|
||||
if first is None:
|
||||
return
|
||||
first_stamp = prev_stamp = first.timestamp
|
||||
for event in filter(lambda x: x.data.__class__ in (Issue, Retire), events):
|
||||
insn = event.data.insn
|
||||
timestamp = event.timestamp
|
||||
if (event.data.__class__ == Retire):
|
||||
if insn in isn_map:
|
||||
latency = timestamp - isn_map[insn]
|
||||
del isn_map[insn]
|
||||
else:
|
||||
latency = np.NAN
|
||||
delta = timestamp - prev_stamp
|
||||
reltime = timestamp - first_stamp
|
||||
yield InstructionStats(insn, latency, timestamp, delta, reltime)
|
||||
prev_stamp = timestamp
|
||||
elif (event.data.__class__ == Issue):
|
||||
isn_map[insn] = timestamp
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._events)
|
||||
|
||||
def to_df(self):
|
||||
return DataFrame.from_records([x.as_dict() for x in self],
|
||||
index='timestamp')
|
||||
|
||||
def runtime_us(self, freq_mhz) -> 'useconds':
|
||||
return (self._events[-1].timestamp - self._events[0].timestamp)/freq_mhz
|
||||
|
||||
def pbs_batches(self, threshold = BATCH_THRESHOLD):
|
||||
pbs = filter(lambda i: i.insn.opcode.startswith('PBS'), self)
|
||||
batches = list(map(Batch, group_by_time(pbs, attrgetter('timestamp'), threshold)))
|
||||
for batch, latency in zip(batches, delta(x.reltime() for x in batches)):
|
||||
batch.latency = latency
|
||||
return batches
|
||||
|
||||
def pbs_latency_table(self, freq_mhz = 350, threshold = BATCH_THRESHOLD):
|
||||
pbs_latency_table = defaultdict(Latency, {})
|
||||
for batch in self.pbs_batches(threshold):
|
||||
pbs_latency_table[len(batch)].append(batch, batch[0].reltime)
|
||||
table = {i: x.as_dict() for i,x in pbs_latency_table.items()}
|
||||
df = DataFrame.from_dict(table, orient="index")
|
||||
clk_cols = ['min', 'avg', 'max', 'sum']
|
||||
df.loc[:, clk_cols] = df.loc[:, clk_cols].apply(lambda x: x/freq_mhz)
|
||||
df.index.name = 'batch size'
|
||||
return df.sort_index()
|
||||
|
||||
def pbs_flushes(self):
|
||||
batch = []
|
||||
for insn in self:
|
||||
if insn.is_pbs():
|
||||
batch.append(insn)
|
||||
|
||||
if insn.is_flush():
|
||||
yield Batch(batch)
|
||||
batch = []
|
||||
|
||||
if len(batch):
|
||||
yield Batch(batch)
|
||||
|
||||
class Issued(Retired):
|
||||
match_class = Issue
|
||||
@classmethod
|
||||
def _filter(cls, events: Iterable):
|
||||
events, first = peek(events)
|
||||
if first is None:
|
||||
return
|
||||
first_stamp = prev_stamp = first.timestamp
|
||||
for event in filter(lambda x: x.data.__class__ == cls.match_class, events):
|
||||
insn = event.data.insn
|
||||
timestamp = event.timestamp
|
||||
if (event.data.__class__ == cls.match_class):
|
||||
latency = None
|
||||
delta = timestamp - prev_stamp
|
||||
reltime = timestamp - first_stamp
|
||||
yield InstructionStats(insn, latency, timestamp, delta, reltime)
|
||||
prev_stamp = timestamp
|
||||
|
||||
class Refilled(Issued):
|
||||
match_class = Refill
|
||||
|
||||
class Trace:
|
||||
def __init__(self, events: Iterable['Event']):
|
||||
self._events = list(events)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._events)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._events)
|
||||
|
||||
def to_df(self):
|
||||
df = DataFrame.from_records([x.as_dict() for x in self],
|
||||
index='timestamp')
|
||||
df['reltime'] = df.index - df.index[0]
|
||||
return df
|
||||
110
backends/tfhe-hpu-backend/python/lib/isctrace/fmt.py
Normal file
110
backends/tfhe-hpu-backend/python/lib/isctrace/fmt.py
Normal file
@@ -0,0 +1,110 @@
|
||||
from . import analysis
|
||||
|
||||
class BaseInstruction:
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
|
||||
def args(self):
|
||||
return str(self.data)
|
||||
|
||||
def __str__(self):
|
||||
return f'{self.__class__.__name__} {self.args()}'
|
||||
|
||||
class NamedInstruction:
|
||||
def __init__(self, name, args):
|
||||
self.name = name
|
||||
self._args = args
|
||||
def args(self):
|
||||
return self._args
|
||||
def __str__(self):
|
||||
return f'{self.name} {self.args()}'
|
||||
|
||||
class PBS(BaseInstruction):
|
||||
def __init__(self, d):
|
||||
self.__dict__ = d
|
||||
|
||||
def args(self):
|
||||
return f'R{self.dst_rid} R{self.src_rid} @{self.gid}'
|
||||
|
||||
class LD(BaseInstruction):
|
||||
def __init__(self, d):
|
||||
self.__dict__ = d
|
||||
|
||||
def args(self):
|
||||
return f'R{self.rid} @{hex(self.slot["Addr"])}'
|
||||
|
||||
class ST(BaseInstruction):
|
||||
def __init__(self, d):
|
||||
self.__dict__ = d
|
||||
|
||||
def args(self):
|
||||
return f'@{hex(self.slot["Addr"])} R{self.rid}'
|
||||
|
||||
class MAC(BaseInstruction):
|
||||
def __init__(self, d):
|
||||
self.__dict__ = d
|
||||
|
||||
def args(self):
|
||||
return f'R{self.dst_rid} R{self.src0_rid} ' +\
|
||||
f'R{self.src1_rid} X{self.mul_factor} '
|
||||
|
||||
class ADD(BaseInstruction):
|
||||
def __init__(self, d):
|
||||
self.__dict__ = d
|
||||
|
||||
def args(self):
|
||||
return f'R{self.dst_rid} R{self.src0_rid} R{self.src1_rid}'
|
||||
|
||||
class ADDS(BaseInstruction):
|
||||
def __init__(self, d):
|
||||
self.__dict__ = d
|
||||
|
||||
def args(self):
|
||||
return f'R{self.dst_rid} R{self.src_rid} {self.msg_cst["Cst"]}'
|
||||
|
||||
class SUB(BaseInstruction):
|
||||
def __init__(self, d):
|
||||
self.__dict__ = d
|
||||
|
||||
def args(self):
|
||||
return f'R{self.dst_rid} R{self.src0_rid} R{self.src1_rid}'
|
||||
|
||||
class SSUB(BaseInstruction):
|
||||
def __init__(self, d):
|
||||
self.__dict__ = d
|
||||
|
||||
def args(self):
|
||||
return f'R{self.dst_rid} {self.msg_cst["Cst"]} R{self.src_rid}'
|
||||
|
||||
class SUBS(BaseInstruction):
|
||||
def __init__(self, d):
|
||||
self.__dict__ = d
|
||||
|
||||
def args(self):
|
||||
return f'R{self.dst_rid} R{self.src_rid} {self.msg_cst["Cst"]}'
|
||||
|
||||
class SYNC(BaseInstruction):
|
||||
def __init__(self, d):
|
||||
self.__dict__ = d
|
||||
|
||||
def args(self):
|
||||
return f"{self.sid}"
|
||||
|
||||
PBS_ML2 = PBS
|
||||
PBS_ML4 = PBS
|
||||
PBS_ML8 = PBS
|
||||
PBS_F = PBS
|
||||
PBS_ML2_F = PBS
|
||||
PBS_ML4_F = PBS
|
||||
PBS_ML8_F = PBS
|
||||
MULS = ADDS
|
||||
SUBS = ADDS
|
||||
|
||||
class Insn:
|
||||
def __init__(self, insn):
|
||||
self.opcode, data = next(iter(insn.items()))
|
||||
self.data = globals()[self.opcode](data) if self.opcode in globals() \
|
||||
else NamedInstruction(self.opcode, data)
|
||||
|
||||
def to_analysis(self):
|
||||
return analysis.Instruction(self.opcode, self.data.args())
|
||||
83
backends/tfhe-hpu-backend/python/lib/isctrace/hw.py
Normal file
83
backends/tfhe-hpu-backend/python/lib/isctrace/hw.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from itertools import accumulate, chain, islice, tee
|
||||
from operator import attrgetter
|
||||
from typing import Iterator
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
from pandas import DataFrame
|
||||
|
||||
from . import analysis, fmt
|
||||
|
||||
"""
|
||||
A trace event
|
||||
"""
|
||||
class Event:
|
||||
EVENT_MAP = {
|
||||
"ISSUE": lambda x: analysis.Issue(fmt.Insn(x.insn).to_analysis()),
|
||||
"RETIRE": lambda x: analysis.Retire(fmt.Insn(x.insn).to_analysis()),
|
||||
"RDUNLOCK": lambda x: analysis.RdUnlock(fmt.Insn(x.insn).to_analysis()),
|
||||
"REFILL": lambda x: analysis.Refill(None),
|
||||
}
|
||||
|
||||
def __init__(self, trace_dict):
|
||||
self.cmd = trace_dict['cmd']
|
||||
self.insn_asm = trace_dict['insn_asm']
|
||||
self.timestamp = trace_dict['timestamp']
|
||||
self.insn = trace_dict['insn']
|
||||
self.sync_id = trace_dict['state']['sync_id']
|
||||
|
||||
def as_dict(self):
|
||||
return self.__dict__
|
||||
|
||||
@staticmethod
|
||||
def default():
|
||||
return Event({"cmd": "NONE", "insn_asm": "", "timestamp": 0})
|
||||
|
||||
def to_analysis(self) -> 'analysis.Event':
|
||||
return analysis.Event(
|
||||
timestamp=self.timestamp,
|
||||
data=self.EVENT_MAP[self.cmd](self))
|
||||
|
||||
|
||||
"""
|
||||
A collection of hardware events
|
||||
"""
|
||||
class Trace:
|
||||
def __init__(self, events):
|
||||
self._events = events
|
||||
|
||||
@staticmethod
|
||||
def from_json(filename):
|
||||
with open(filename, 'r') as fd:
|
||||
return Trace([Event(x) for x in json.load(fd)])
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._events)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._events)
|
||||
|
||||
# Tries to split the event stream in IOP boundaries
|
||||
def iops(self):
|
||||
id_map = defaultdict(list, {})
|
||||
for event in self:
|
||||
id_map[event.sync_id].append(event)
|
||||
opcode = next(iter(event.insn.keys())) if event.insn is not None else None
|
||||
|
||||
if opcode == "SYNC":
|
||||
yield Trace(id_map[event.sync_id])
|
||||
del id_map[event.sync_id]
|
||||
|
||||
if len(id_map):
|
||||
logging.warn("The trace contains incomplete IOPs")
|
||||
|
||||
def to_analysis(self) -> Iterator['analysis.Event']:
|
||||
return analysis.Trace(x.to_analysis() for x in self)
|
||||
|
||||
def from_hw(filename) -> 'analysis.Trace':
|
||||
return [x.to_analysis() for x in Trace.from_json(filename).iops()]
|
||||
|
||||
# Register a factory function directly in the analysis module
|
||||
setattr(analysis.Trace, 'from_hw', from_hw)
|
||||
93
backends/tfhe-hpu-backend/python/lib/isctrace/mockup.py
Normal file
93
backends/tfhe-hpu-backend/python/lib/isctrace/mockup.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# A Library to load mockup traces
|
||||
import json
|
||||
|
||||
import pandas
|
||||
|
||||
from . import analysis, fmt
|
||||
|
||||
|
||||
class ArgId:
|
||||
def __init__(self, d):
|
||||
self.__dict__ = d
|
||||
|
||||
class Instruction:
|
||||
def __init__(self, d):
|
||||
self.__dict__.update(d)
|
||||
self.dst_id = ArgId(self.dst_id)
|
||||
self.srca_id = ArgId(self.srca_id)
|
||||
self.srcb_id = ArgId(self.srcb_id)
|
||||
self.insn = fmt.Insn(d['op'])
|
||||
|
||||
def __str__(self):
|
||||
return str(self.insn)
|
||||
|
||||
class Slot:
|
||||
def __init__(self, d):
|
||||
self.insn_data = Instruction(d['inst'])
|
||||
self.state = d['state']
|
||||
|
||||
def __str__(self):
|
||||
return str(self.insn_data)
|
||||
|
||||
def to_analysis(self):
|
||||
return self.insn_data.insn.to_analysis()
|
||||
|
||||
# The only two subtypes
|
||||
class Query:
|
||||
def __init__(self, event):
|
||||
self.__dict__.update(event)
|
||||
self.slot = Slot(self.slot)
|
||||
self.subtype = self.cmd
|
||||
self.desc = str(self.slot)
|
||||
def to_analysis(self):
|
||||
return getattr(analysis, self.subtype)(self.slot.to_analysis())
|
||||
|
||||
class ReqTimeout:
|
||||
def __init__(self, timestamp):
|
||||
self.timestamp = timestamp
|
||||
def to_analysis(self):
|
||||
return analysis.ReqTimeout(self.timestamp)
|
||||
|
||||
class BatchStart:
|
||||
def __init__(self, d):
|
||||
self.pe_id = d['pe_id']
|
||||
self.issued = d['issued']
|
||||
def to_analysis(self):
|
||||
return analysis.BatchStart(self.pe_id, self.issued)
|
||||
|
||||
class NamedEvent:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
def to_analysis(self):
|
||||
return getattr(analysis, self.name)()
|
||||
|
||||
class Event:
|
||||
def __init__(self, trace_dict):
|
||||
self.timestamp = trace_dict['timestamp']
|
||||
event = trace_dict['event']
|
||||
|
||||
if event.__class__ == dict:
|
||||
key = next(iter(event.keys()))
|
||||
self.event = globals()[key](event[key])
|
||||
else:
|
||||
self.event = NamedEvent(event)
|
||||
|
||||
def to_analysis(self):
|
||||
return analysis.Event(
|
||||
timestamp=self.timestamp,
|
||||
data=self.event.to_analysis())
|
||||
|
||||
class Trace:
|
||||
def __init__(self, jsonfile):
|
||||
with open(jsonfile, 'r') as fd:
|
||||
self.traces = list(map(Event, json.load(fd)))
|
||||
def __iter__(self):
|
||||
return iter(self.traces)
|
||||
def to_analysis(self):
|
||||
return analysis.Trace((x.to_analysis() for x in self))
|
||||
|
||||
def from_mockup(filename: str) -> 'analysis.Trace':
|
||||
return Trace(filename).to_analysis()
|
||||
|
||||
# Register a from directly in analysis code
|
||||
setattr(analysis.Trace, 'from_mockup', from_mockup)
|
||||
3
backends/tfhe-hpu-backend/python/requirements.txt
Normal file
3
backends/tfhe-hpu-backend/python/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
pandas == 2.2.3
|
||||
numpy == 1.26.3
|
||||
logging
|
||||
519
backends/tfhe-hpu-backend/src/asm/dop/arg.rs
Normal file
519
backends/tfhe-hpu-backend/src/asm/dop/arg.rs
Normal file
@@ -0,0 +1,519 @@
|
||||
//!
|
||||
//! Gather DOp argument in a common type
|
||||
//! Provides a FromStr implementation for parsing
|
||||
|
||||
use crate::asm::CtId;
|
||||
|
||||
use super::field::{ImmId, MemId, RegId, SyncId};
|
||||
use super::*;
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
/// Minimum asm arg width to have aligned field
|
||||
pub const ARG_MIN_WIDTH: usize = 16;
|
||||
pub const DOP_MIN_WIDTH: usize = 10;
|
||||
|
||||
/// Generic arguments
|
||||
/// Used to pack argument under the same type
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Arg {
|
||||
Reg(RegId),
|
||||
Mem(MemId),
|
||||
Imm(ImmId),
|
||||
Pbs(Pbs),
|
||||
Sync(SyncId),
|
||||
}
|
||||
|
||||
/// Use Display trait to convert into asm human readable file
|
||||
/// Simply defer to inner type display impl while forcing the display width
|
||||
impl std::fmt::Display for Arg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
Arg::Reg(inner) => write!(f, "{inner: <ARG_MIN_WIDTH$}"),
|
||||
Arg::Mem(inner) => write!(f, "{inner: <ARG_MIN_WIDTH$}"),
|
||||
Arg::Imm(inner) => write!(f, "{inner: <ARG_MIN_WIDTH$}"),
|
||||
Arg::Pbs(inner) => write!(f, "{inner: <ARG_MIN_WIDTH$}"),
|
||||
Arg::Sync(inner) => write!(f, "{inner: <ARG_MIN_WIDTH$}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parsing error
|
||||
#[derive(thiserror::Error, Debug, Clone)]
|
||||
pub enum ParsingError {
|
||||
#[error("Unmatch Asm Operation: {0}")]
|
||||
Unmatch(String),
|
||||
#[error("Invalid arguments number: expect {0}, get {1}")]
|
||||
ArgNumber(usize, usize),
|
||||
#[error("Invalid arguments type: expect {0}, get {1}")]
|
||||
ArgType(String, Arg),
|
||||
#[error("Invalid arguments: {0}")]
|
||||
InvalidArg(String),
|
||||
#[error("Empty line")]
|
||||
Empty,
|
||||
}
|
||||
|
||||
/// Use FromStr trait to decode from asm file
|
||||
impl std::str::FromStr for Arg {
|
||||
type Err = ParsingError;
|
||||
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
lazy_static! {
|
||||
static ref DOP_ARG_RE: regex::Regex = regex::Regex::new(
|
||||
r"(?<register>^R(?<rid>[0-9]+))|(?<mem_addr>^@((?<hex_cid>0x[0-9a-fA-F]+)|(?<cid>[0-9]+)))|(?<mem_tmpl>^(?<mt_orig>TS|TD|TH)(\[(?<mt_id>\d+)\])*\.(?<mt_bid>\d+))|(?<imm_cst>^((?<hex_cst>0x[0-9a-fA-F]+)|(?<cst>[0-9]+)))|(?<imm_var>^TI\[(?<it_id>\d+)\]\.(?<it_bid>\d+))|(?<pbs>^Pbs(?<pbs_name>(\S+)))"
|
||||
)
|
||||
.expect("Invalid regex");
|
||||
}
|
||||
|
||||
if let Some(caps) = DOP_ARG_RE.captures(s) {
|
||||
if let Some(_register) = caps.name("register") {
|
||||
let rid = caps["rid"]
|
||||
.parse::<u8>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?;
|
||||
Ok(Arg::Reg(RegId(rid)))
|
||||
} else if let Some(_mem_addr) = caps.name("mem_addr") {
|
||||
let cid = if let Some(raw_cid) = caps.name("cid") {
|
||||
raw_cid
|
||||
.as_str()
|
||||
.parse::<u16>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
} else {
|
||||
// One of them must match, otherwise error will be arose before
|
||||
let raw_hex_cid = caps.name("hex_cid").unwrap();
|
||||
u16::from_str_radix(&raw_hex_cid.as_str()[2..], 16)
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
};
|
||||
Ok(Arg::Mem(MemId::Addr(CtId(cid))))
|
||||
} else if let Some(_mem_tmpl) = caps.name("mem_tmpl") {
|
||||
let tid = if let Some(raw_tid) = caps.name("mt_id") {
|
||||
Some(
|
||||
raw_tid
|
||||
.as_str()
|
||||
.parse::<u8>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let bid = caps["mt_bid"]
|
||||
.parse::<u16>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?;
|
||||
|
||||
match &caps["mt_orig"] {
|
||||
"TS" => {
|
||||
if tid.is_none() {
|
||||
return Err(ParsingError::InvalidArg(format!("Memory template Src must have following format `TS[tid].bid` (parsed {})",&caps["mem_tmpl"])));
|
||||
}
|
||||
Ok(Arg::Mem(MemId::Src {
|
||||
tid: tid.unwrap(),
|
||||
bid: bid as u8,
|
||||
}))
|
||||
}
|
||||
"TD" => {
|
||||
if tid.is_none() {
|
||||
return Err(ParsingError::InvalidArg(format!("Memory template Dst must have following format `TD[tid].bid` (parsed {})",&caps["mem_tmpl"])));
|
||||
}
|
||||
Ok(Arg::Mem(MemId::Dst {
|
||||
tid: tid.unwrap(),
|
||||
bid: bid as u8,
|
||||
}))
|
||||
}
|
||||
"TH" => {
|
||||
if tid.is_some() {
|
||||
return Err(ParsingError::InvalidArg(format!("Memory template Heap must have following format `TH.bid` (parsed {})",&caps["mem_tmpl"])));
|
||||
}
|
||||
Ok(Arg::Mem(MemId::Heap { bid }))
|
||||
}
|
||||
_ => Err(ParsingError::InvalidArg(format!(
|
||||
"Invalid memory template argument {}",
|
||||
&caps["mem_tmpl"]
|
||||
))),
|
||||
}
|
||||
} else if let Some(_imm_cst) = caps.name("imm_cst") {
|
||||
let cst = if let Some(raw_cst) = caps.name("cst") {
|
||||
raw_cst
|
||||
.as_str()
|
||||
.parse::<u16>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
} else {
|
||||
// One of them must match, otherwise error will be arose before
|
||||
let raw_hex_cst = caps.name("hex_cst").unwrap();
|
||||
u16::from_str_radix(&raw_hex_cst.as_str()[2..], 16)
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
};
|
||||
Ok(Arg::Imm(ImmId::Cst(cst)))
|
||||
} else if let Some(_imm_var) = caps.name("imm_var") {
|
||||
let tid = caps["it_id"]
|
||||
.parse::<u8>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?;
|
||||
let bid = caps["it_bid"]
|
||||
.parse::<u8>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?;
|
||||
Ok(Arg::Imm(ImmId::Var { tid, bid }))
|
||||
} else if let Some(_pbs) = caps.name("pbs") {
|
||||
Ok(Arg::Pbs(Pbs::from_str(&caps["pbs_name"])?))
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(format!(
|
||||
"Invalid argument format {s}"
|
||||
)))
|
||||
}
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(format!(
|
||||
"Invalid argument format {s}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait FromAsm
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
fn from_args(opcode: u8, args: &[arg::Arg]) -> Result<Self, ParsingError>;
|
||||
}
|
||||
|
||||
#[enum_dispatch]
|
||||
pub trait ToAsm
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
fn name(&self) -> &'static str {
|
||||
std::any::type_name_of_val(self)
|
||||
}
|
||||
|
||||
fn args(&self) -> Vec<arg::Arg> {
|
||||
let mut arg = self.dst();
|
||||
arg.extend_from_slice(self.src().as_slice());
|
||||
arg
|
||||
}
|
||||
fn dst(&self) -> Vec<arg::Arg>;
|
||||
fn src(&self) -> Vec<arg::Arg>;
|
||||
}
|
||||
|
||||
#[enum_dispatch]
|
||||
pub trait IsFlush
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
fn is_flush(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ToFlush
|
||||
where
|
||||
Self: Sized + Clone,
|
||||
{
|
||||
fn to_flush(&self) -> Self {
|
||||
self.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl FromAsm for field::PeArithInsn {
|
||||
fn from_args(opcode: u8, args: &[arg::Arg]) -> Result<Self, ParsingError> {
|
||||
if (args.len() != 3) && (args.len() != 4) {
|
||||
return Err(ParsingError::ArgNumber(3, args.len()));
|
||||
}
|
||||
|
||||
let dst_rid = match args[0] {
|
||||
Arg::Reg(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Reg".to_string(),
|
||||
args[0].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
let src0_rid = match args[1] {
|
||||
Arg::Reg(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Reg".to_string(),
|
||||
args[1].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
let src1_rid = match args[2] {
|
||||
Arg::Reg(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Reg".to_string(),
|
||||
args[2].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let mul_factor = if let Some(arg) = args.get(3) {
|
||||
match arg {
|
||||
Arg::Imm(ImmId::Cst(id)) => MulFactor(*id as u8),
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Imm::Cst".to_string(),
|
||||
args[3].clone(),
|
||||
))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
MulFactor(0)
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
opcode: Opcode::from(opcode),
|
||||
mul_factor,
|
||||
src1_rid,
|
||||
src0_rid,
|
||||
dst_rid,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ToAsm for PeArithInsn {
|
||||
fn dst(&self) -> Vec<arg::Arg> {
|
||||
vec![arg::Arg::Reg(self.dst_rid)]
|
||||
}
|
||||
fn src(&self) -> Vec<arg::Arg> {
|
||||
let mut src = vec![arg::Arg::Reg(self.src0_rid), arg::Arg::Reg(self.src1_rid)];
|
||||
if self.mul_factor != MulFactor(0) {
|
||||
src.push(arg::Arg::Imm(ImmId::Cst(self.mul_factor.0 as u16)));
|
||||
}
|
||||
src
|
||||
}
|
||||
}
|
||||
|
||||
impl FromAsm for field::PeArithMsgInsn {
|
||||
fn from_args(opcode: u8, args: &[arg::Arg]) -> Result<Self, ParsingError> {
|
||||
if args.len() != 3 {
|
||||
return Err(ParsingError::ArgNumber(3, args.len()));
|
||||
}
|
||||
|
||||
let dst_rid = match args[0] {
|
||||
Arg::Reg(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Reg".to_string(),
|
||||
args[0].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
let src_rid = match args[1] {
|
||||
Arg::Reg(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Reg".to_string(),
|
||||
args[1].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
let msg_cst = match args[2] {
|
||||
Arg::Imm(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Imm".to_string(),
|
||||
args[2].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
opcode: Opcode::from(opcode),
|
||||
msg_cst,
|
||||
src_rid,
|
||||
dst_rid,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ToAsm for PeArithMsgInsn {
|
||||
fn dst(&self) -> Vec<arg::Arg> {
|
||||
vec![arg::Arg::Reg(self.dst_rid)]
|
||||
}
|
||||
fn src(&self) -> Vec<arg::Arg> {
|
||||
vec![arg::Arg::Reg(self.src_rid), arg::Arg::Imm(self.msg_cst)]
|
||||
}
|
||||
}
|
||||
|
||||
impl FromAsm for field::PeMemInsn {
|
||||
fn from_args(opcode: u8, args: &[arg::Arg]) -> Result<Self, ParsingError> {
|
||||
if args.len() != 2 {
|
||||
return Err(ParsingError::ArgNumber(2, args.len()));
|
||||
}
|
||||
|
||||
let (rid, mid) = match opcode {
|
||||
_x if _x == u8::from(opcode::Opcode::LD()) => {
|
||||
let rid = match args[0] {
|
||||
Arg::Reg(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Reg".to_string(),
|
||||
args[0].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
let slot = match args[1] {
|
||||
Arg::Mem(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Mem".to_string(),
|
||||
args[1].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
(rid, slot)
|
||||
}
|
||||
_x if _x == u8::from(opcode::Opcode::ST()) => {
|
||||
let slot = match args[0] {
|
||||
Arg::Mem(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Mem".to_string(),
|
||||
args[0].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let rid = match args[1] {
|
||||
Arg::Reg(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Reg".to_string(),
|
||||
args[1].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
(rid, slot)
|
||||
}
|
||||
_ => {
|
||||
return Err(ParsingError::Unmatch(
|
||||
"PeMemInsn expect LD/ST opcode".to_string(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
opcode: Opcode::from(opcode),
|
||||
slot: mid,
|
||||
rid,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ToAsm for PeMemInsn {
|
||||
fn dst(&self) -> Vec<arg::Arg> {
|
||||
match self.opcode {
|
||||
_x if _x == opcode::Opcode::LD() => vec![Arg::Reg(self.rid)],
|
||||
_x if _x == opcode::Opcode::ST() => vec![Arg::Mem(self.slot)],
|
||||
_ => panic!("Unsupported opcode for PeMemInsn"),
|
||||
}
|
||||
}
|
||||
fn src(&self) -> Vec<arg::Arg> {
|
||||
match self.opcode {
|
||||
_x if _x == opcode::Opcode::LD() => vec![Arg::Mem(self.slot)],
|
||||
_x if _x == opcode::Opcode::ST() => vec![Arg::Reg(self.rid)],
|
||||
_ => panic!("Unsupported opcode for PeMemInsn"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromAsm for field::PePbsInsn {
|
||||
fn from_args(opcode: u8, args: &[arg::Arg]) -> Result<Self, ParsingError> {
|
||||
if args.len() != 3 {
|
||||
return Err(ParsingError::ArgNumber(3, args.len()));
|
||||
}
|
||||
|
||||
let dst_rid = match args[0] {
|
||||
Arg::Reg(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Reg".to_string(),
|
||||
args[0].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
let src_rid = match args[1] {
|
||||
Arg::Reg(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Reg".to_string(),
|
||||
args[1].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
let pbs_lut = match &args[2] {
|
||||
Arg::Pbs(id) => id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Pbs".to_string(),
|
||||
args[2].clone(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
opcode: Opcode::from(opcode),
|
||||
gid: pbs_lut.gid(),
|
||||
src_rid,
|
||||
dst_rid,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ToAsm for PePbsInsn {
|
||||
fn dst(&self) -> Vec<arg::Arg> {
|
||||
vec![Arg::Reg(self.dst_rid)]
|
||||
}
|
||||
fn src(&self) -> Vec<arg::Arg> {
|
||||
vec![
|
||||
Arg::Reg(self.src_rid),
|
||||
Arg::Pbs(Pbs::from_hex(self.gid).unwrap()),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
impl FromAsm for field::PeSyncInsn {
|
||||
fn from_args(opcode: u8, args: &[arg::Arg]) -> Result<Self, ParsingError> {
|
||||
if (args.len() != 1) && (!args.is_empty()) {
|
||||
return Err(ParsingError::ArgNumber(1, args.len()));
|
||||
}
|
||||
|
||||
let sid = if let Some(arg) = args.get(1) {
|
||||
match arg {
|
||||
Arg::Sync(id) => *id,
|
||||
_ => {
|
||||
return Err(ParsingError::ArgType(
|
||||
"Arg::Sync".to_string(),
|
||||
args[1].clone(),
|
||||
))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
SyncId(0)
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
opcode: Opcode::from(opcode),
|
||||
sid,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ToAsm for PeSyncInsn {
|
||||
fn dst(&self) -> Vec<arg::Arg> {
|
||||
vec![]
|
||||
}
|
||||
fn src(&self) -> Vec<arg::Arg> {
|
||||
vec![Arg::Sync(self.sid)]
|
||||
}
|
||||
}
|
||||
|
||||
impl ToFlush for field::PePbsInsn {
|
||||
fn to_flush(&self) -> Self {
|
||||
PePbsInsn {
|
||||
opcode: self.opcode.to_flush(),
|
||||
..*self
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ToFlush for field::PeSyncInsn {}
|
||||
impl ToFlush for field::PeArithInsn {}
|
||||
impl ToFlush for field::PeArithMsgInsn {}
|
||||
impl ToFlush for field::PeMemInsn {}
|
||||
388
backends/tfhe-hpu-backend/src/asm/dop/dop_macro.rs
Normal file
388
backends/tfhe-hpu-backend/src/asm/dop/dop_macro.rs
Normal file
@@ -0,0 +1,388 @@
|
||||
//! DOp definition is repetitive
|
||||
//!
|
||||
//! Indeed except the behavior DOp shared a small set of format.
|
||||
//! And for a given format all the parsing logic is the same
|
||||
//! A macro rules is used to help with DOp definition
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_dop_parser {
|
||||
(
|
||||
$asm: literal,
|
||||
$opcode: expr,
|
||||
$field: ty,
|
||||
$fmt: ty
|
||||
$(,)?
|
||||
) => {
|
||||
::paste::paste! {
|
||||
impl [<DOp $asm:camel>] {
|
||||
fn from_args(args: &[arg::Arg]) -> Result<DOp, ParsingError> {
|
||||
let fmt_op = $field::from_args($opcode.into(), args)?;
|
||||
Ok(DOp::[< $asm:upper >](Self(fmt_op)))
|
||||
}
|
||||
|
||||
fn from_hex(hex: DOpRepr) -> DOp {
|
||||
DOp::[< $asm:upper >](Self($field::from(&$fmt::from_bits(hex))))
|
||||
}
|
||||
|
||||
pub fn opcode() -> u8 {
|
||||
$opcode.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl ToAsm for [<DOp $asm:camel>]{
|
||||
fn name(&self) -> &'static str {
|
||||
$asm
|
||||
}
|
||||
fn args(&self) -> Vec<arg::Arg> {
|
||||
self.0.args()
|
||||
}
|
||||
fn dst(&self) -> Vec<arg::Arg> {
|
||||
self.0.dst()
|
||||
}
|
||||
fn src(&self) -> Vec<arg::Arg> {
|
||||
self.0.src()
|
||||
}
|
||||
}
|
||||
|
||||
impl ToHex for [<DOp $asm:camel>] {
|
||||
fn to_hex(&self) -> DOpRepr {
|
||||
$fmt::from(&self.0).into_bits()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_dop {
|
||||
// Arith operations ---------------------------------------------------------------------------
|
||||
(
|
||||
$asm: literal,
|
||||
$opcode: expr,
|
||||
PeArithInsn
|
||||
$(,)?
|
||||
) => {
|
||||
::paste::paste! {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct [<DOp $asm:camel>](pub PeArithInsn);
|
||||
|
||||
impl [<DOp $asm:camel>] {
|
||||
pub fn new(dst: RegId, src0: RegId, src1: RegId) -> Self {
|
||||
Self(PeArithInsn {
|
||||
opcode: $opcode,
|
||||
mul_factor: MulFactor(0),
|
||||
src1_rid: src1,
|
||||
src0_rid: src0,
|
||||
dst_rid: dst,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl IsFlush for [<DOp $asm:camel>]{}
|
||||
impl_dop_parser!($asm, $opcode, PeArithInsn, PeArithHex);
|
||||
}
|
||||
};
|
||||
// Arith operations with mult_factor ----------------------------------------------------------
|
||||
(
|
||||
$asm: literal,
|
||||
$opcode: expr,
|
||||
PeArithInsn_mul_factor
|
||||
$(,)?
|
||||
) => {
|
||||
::paste::paste! {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct [<DOp $asm:camel>](pub PeArithInsn);
|
||||
|
||||
impl [<DOp $asm:camel>] {
|
||||
pub fn new(dst_rid: RegId, src0_rid: RegId, src1_rid: RegId, mul_factor: MulFactor) -> Self {
|
||||
Self(PeArithInsn {
|
||||
opcode: $opcode,
|
||||
mul_factor,
|
||||
src1_rid,
|
||||
src0_rid,
|
||||
dst_rid,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl IsFlush for [<DOp $asm:camel>] {}
|
||||
impl_dop_parser!($asm, $opcode, PeArithInsn, PeArithHex);
|
||||
}
|
||||
};
|
||||
// ArithMsg operations ------------------------------------------------------------------------
|
||||
(
|
||||
$asm: literal,
|
||||
$opcode: expr,
|
||||
PeArithMsgInsn
|
||||
$(,)?
|
||||
) => {
|
||||
::paste::paste! {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct [<DOp $asm:camel>](pub PeArithMsgInsn);
|
||||
|
||||
impl [<DOp $asm:camel>] {
|
||||
pub fn new(dst_rid: RegId, src_rid: RegId, msg_cst: ImmId) -> Self {
|
||||
Self(PeArithMsgInsn {
|
||||
opcode: $opcode,
|
||||
msg_cst,
|
||||
src_rid,
|
||||
dst_rid,
|
||||
})
|
||||
}
|
||||
/// Access inner imm for template patching
|
||||
pub fn msg_mut(&mut self) -> &mut ImmId {
|
||||
&mut self.0.msg_cst
|
||||
}
|
||||
}
|
||||
|
||||
impl IsFlush for [<DOp $asm:camel>]{}
|
||||
impl_dop_parser!($asm, $opcode, PeArithMsgInsn, PeArithMsgHex);
|
||||
}
|
||||
};
|
||||
|
||||
// Mem operations ------------------------------------------------------------------------
|
||||
// Load flavor
|
||||
(
|
||||
$asm: literal,
|
||||
$opcode: expr,
|
||||
PeMemInsn_ld
|
||||
$(,)?
|
||||
) => {
|
||||
::paste::paste! {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct [<DOp $asm:camel>](pub PeMemInsn);
|
||||
|
||||
impl [<DOp $asm:camel>] {
|
||||
pub fn new(rid: RegId, mid: MemId) -> Self {
|
||||
Self(PeMemInsn {
|
||||
opcode: $opcode,
|
||||
slot: mid,
|
||||
rid,
|
||||
})
|
||||
}
|
||||
/// Access inner rid
|
||||
pub fn rid(&self) -> &RegId {
|
||||
&self.0.rid
|
||||
}
|
||||
/// Access inner memory slot
|
||||
pub fn slot(&self) -> &MemId {
|
||||
&self.0.slot
|
||||
}
|
||||
/// Access inner memory for template patching
|
||||
pub fn slot_mut(&mut self) -> &mut MemId {
|
||||
&mut self.0.slot
|
||||
}
|
||||
}
|
||||
|
||||
impl IsFlush for [<DOp $asm:camel>]{}
|
||||
impl_dop_parser!($asm, $opcode, PeMemInsn, PeMemHex);
|
||||
}
|
||||
};
|
||||
|
||||
// Store flavor
|
||||
(
|
||||
$asm: literal,
|
||||
$opcode: expr,
|
||||
PeMemInsn_st
|
||||
$(,)?
|
||||
) => {
|
||||
::paste::paste! {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct [<DOp $asm:camel>](pub PeMemInsn);
|
||||
|
||||
impl [<DOp $asm:camel>] {
|
||||
pub fn new( mid: MemId, rid: RegId) -> Self {
|
||||
Self(PeMemInsn {
|
||||
opcode: $opcode,
|
||||
slot: mid,
|
||||
rid,
|
||||
})
|
||||
}
|
||||
/// Access inner rid
|
||||
pub fn rid(&self) -> &RegId {
|
||||
&self.0.rid
|
||||
}
|
||||
/// Access inner memory slot
|
||||
pub fn slot(&self) -> &MemId {
|
||||
&self.0.slot
|
||||
}
|
||||
/// Access inner memory for template patching
|
||||
pub fn slot_mut(&mut self) -> &mut MemId {
|
||||
&mut self.0.slot
|
||||
}
|
||||
}
|
||||
|
||||
impl IsFlush for [<DOp $asm:camel>]{}
|
||||
impl_dop_parser!($asm, $opcode, PeMemInsn, PeMemHex);
|
||||
}
|
||||
};
|
||||
|
||||
// Pbs operations ------------------------------------------------------------------------
|
||||
(
|
||||
$asm: literal,
|
||||
$opcode: expr,
|
||||
PePbsInsn
|
||||
$(,)?
|
||||
) => {
|
||||
::paste::paste! {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct [<DOp $asm:camel>](pub PePbsInsn);
|
||||
|
||||
impl [<DOp $asm:camel>] {
|
||||
pub fn new(dst_rid: RegId, src_rid: RegId, gid: PbsGid) -> Self {
|
||||
Self(PePbsInsn {
|
||||
opcode: $opcode,
|
||||
gid,
|
||||
src_rid,
|
||||
dst_rid,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl IsFlush for [<DOp $asm:camel>] {
|
||||
fn is_flush(&self) -> bool {
|
||||
$opcode.is_flush()
|
||||
}
|
||||
}
|
||||
impl_dop_parser!($asm, $opcode, PePbsInsn, PePbsHex);
|
||||
}
|
||||
};
|
||||
|
||||
// Sync operations ------------------------------------------------------------------------
|
||||
(
|
||||
$asm: literal,
|
||||
$opcode: expr,
|
||||
PeSyncInsn
|
||||
$(,)?
|
||||
) => {
|
||||
::paste::paste! {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct [<DOp $asm:camel>](pub PeSyncInsn);
|
||||
|
||||
impl [<DOp $asm:camel>] {
|
||||
pub fn new(sid: Option<SyncId>) -> Self {
|
||||
Self(PeSyncInsn {
|
||||
opcode: $opcode,
|
||||
sid: sid.unwrap_or(SyncId(0))
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl IsFlush for [<DOp $asm:camel>]{}
|
||||
impl_dop_parser!($asm, $opcode, PeSyncInsn, PeSyncHex);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! dop {
|
||||
(
|
||||
$([$asm: literal, $opcode: expr, $type: ty $({$fmt: tt})? $(,$flush: literal)?] $(,)?)*
|
||||
) => {
|
||||
::paste::paste! {
|
||||
type AsmCallback = fn(&[arg::Arg]) -> Result<DOp, ParsingError>;
|
||||
type HexCallback = fn(DOpRepr) -> DOp;
|
||||
|
||||
$(
|
||||
impl_dop!($asm, $opcode, [< $type $(_ $fmt)? >]);
|
||||
)*
|
||||
|
||||
/// Aggregate DOp concrete type in one enumeration
|
||||
// #[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[enum_dispatch(ToAsm, ToHex, IsFlush)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum DOp{
|
||||
// $([< $asm:upper >]($type),)*
|
||||
$([< $asm:upper >]([< DOp $asm:camel>]),)*
|
||||
}
|
||||
|
||||
impl ToFlush for DOp {
|
||||
fn to_flush(&self) -> Self {
|
||||
match self {
|
||||
$(
|
||||
DOp::[< $asm:upper >](inner) => DOp::[< $asm:upper $($flush)?>]
|
||||
([< DOp $asm:camel $($flush:camel)? >](inner.0.to_flush())),
|
||||
)*
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DOp {
|
||||
pub fn from_args(name: &str, args: &[arg::Arg]) -> Result<Self, ParsingError> {
|
||||
if let Some(cb) = DOP_LUT.asm.get(name) {
|
||||
cb(args)
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(format!("{name} unknown")))
|
||||
}
|
||||
}
|
||||
/// Construct DOp from hex word
|
||||
pub fn from_hex(hex: DOpRepr) -> Result<Self, ParsingError> {
|
||||
let raw = DOpRawHex::from_bits(hex);
|
||||
if let Some(cb) = DOP_LUT.hex.get(&raw.opcode()) {
|
||||
Ok(cb(hex))
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(format!("DOp {:x?} unknown [hex {:x}]", raw.opcode(), hex)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for DOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{:<width$}", self.name(), width= arg::DOP_MIN_WIDTH)?;
|
||||
for arg in self.args().iter() {
|
||||
write!(f, "{:<width$} ", arg.to_string(), width = arg::ARG_MIN_WIDTH)?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct DOp from ASM str
|
||||
impl std::str::FromStr for DOp {
|
||||
type Err = ParsingError;
|
||||
|
||||
fn from_str(asm: &str) -> Result<Self, Self::Err> {
|
||||
|
||||
// Split asm string in a vector of arguments
|
||||
let arg_str = asm.split_whitespace().collect::<Vec<_>>();
|
||||
if !arg_str.is_empty() {
|
||||
let name = arg_str[0];
|
||||
let args = arg_str[1..]
|
||||
.iter()
|
||||
.map(|s| {
|
||||
arg::Arg::from_str(s)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
Self::from_args(name, args.as_slice())
|
||||
}else {
|
||||
Err(ParsingError::Empty)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parser utilities
|
||||
/// Hashmap for Name -> to fromArg impl
|
||||
struct DOpFromArg{
|
||||
asm: HashMap<String, AsmCallback>,
|
||||
hex: HashMap<u8, HexCallback>,
|
||||
}
|
||||
lazy_static! {
|
||||
static ref DOP_LUT: DOpFromArg = {
|
||||
|
||||
let mut dop_from_arg = DOpFromArg{
|
||||
asm: HashMap::new(),
|
||||
hex: HashMap::new(),
|
||||
};
|
||||
|
||||
$(
|
||||
dop_from_arg.asm.insert(stringify!([< $asm:upper >]).to_string(), [<DOp $asm:camel >]::from_args);
|
||||
dop_from_arg.hex.insert(u8::from($opcode), [<DOp $asm:camel >]::from_hex);
|
||||
)*
|
||||
dop_from_arg
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
168
backends/tfhe-hpu-backend/src/asm/dop/field.rs
Normal file
168
backends/tfhe-hpu-backend/src/asm/dop/field.rs
Normal file
@@ -0,0 +1,168 @@
|
||||
//! List of DOp field
|
||||
//! Mainly thin wrapper over basic type to enforce correct used of asm fields
|
||||
|
||||
// Retrieved CtId definition
|
||||
// This definition is on the boundaries between IOp and DOp and thus define in the top.
|
||||
use super::opcode::Opcode;
|
||||
use crate::asm::CtId;
|
||||
|
||||
/// Register argument
|
||||
/// Direct mapping of value to register Id
|
||||
/// 7bits wide -> 128 registers
|
||||
#[derive(
|
||||
Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, Default,
|
||||
)]
|
||||
pub struct RegId(pub u8);
|
||||
|
||||
impl std::ops::Add<usize> for RegId {
|
||||
type Output = RegId;
|
||||
fn add(self, rhs: usize) -> Self::Output {
|
||||
RegId(self.0 + (rhs as u8))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RegId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "R{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// MulFactor argument
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
|
||||
pub struct MulFactor(pub u8);
|
||||
|
||||
impl std::fmt::Display for MulFactor {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "R{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory arguments
|
||||
/// Have multiple mode for proper support of template addressing
|
||||
/// Template enable runtime replacement of MemId with associated Top-level arguments
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
|
||||
pub enum MemId {
|
||||
Addr(CtId),
|
||||
Heap { bid: u16 },
|
||||
Src { tid: u8, bid: u8 },
|
||||
Dst { tid: u8, bid: u8 },
|
||||
}
|
||||
|
||||
impl MemId {
|
||||
pub fn new_heap(bid: u16) -> Self {
|
||||
Self::Heap { bid }
|
||||
}
|
||||
pub fn new_dst(tid: u8, bid: u8) -> Self {
|
||||
Self::Dst { tid, bid }
|
||||
}
|
||||
pub fn new_src(tid: u8, bid: u8) -> Self {
|
||||
Self::Src { tid, bid }
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for MemId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
MemId::Addr(addr) => write!(f, "@0x{:x}", addr.0),
|
||||
MemId::Heap { bid } => write!(f, "TH.{bid}"),
|
||||
MemId::Src { tid, bid } => write!(f, "TS[{tid}].{bid}"),
|
||||
MemId::Dst { tid, bid } => write!(f, "TD[{tid}].{bid}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory arguments
|
||||
/// Have multiple mode for proper support of template addressing
|
||||
/// Template enable runtime replacement of MemId with associated Top-level arguments
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub enum ImmId {
|
||||
Cst(u16),
|
||||
Var { tid: u8, bid: u8 },
|
||||
}
|
||||
|
||||
impl ImmId {
|
||||
/// Create new immediat template
|
||||
pub fn new_var(tid: u8, bid: u8) -> Self {
|
||||
Self::Var { tid, bid }
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ImmId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
ImmId::Cst(val) => write!(f, "{val}"),
|
||||
ImmId::Var { tid, bid } => write!(f, "TI[{tid}].{bid}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pbs argument
|
||||
/// Direct mapping to PBS Gid
|
||||
/// 12bits wide -> 4096 lut entries
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
|
||||
pub struct PbsGid(pub u16);
|
||||
|
||||
impl std::fmt::Display for PbsGid {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "Pbs{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Sync argument
|
||||
/// Currently unused
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct SyncId(pub u32);
|
||||
|
||||
impl std::fmt::Display for SyncId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// PeArith instructions
|
||||
/// Arithmetic operation that use one destination register and two sources register
|
||||
/// Have also an extra mul_factor field for MAC insn
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct PeArithInsn {
|
||||
pub dst_rid: RegId,
|
||||
pub src0_rid: RegId,
|
||||
pub src1_rid: RegId,
|
||||
pub mul_factor: MulFactor,
|
||||
pub opcode: Opcode,
|
||||
}
|
||||
|
||||
/// PeaMsg instructions
|
||||
/// Arithmetic operation that use one destination register, one source register and an immediat
|
||||
/// value
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct PeArithMsgInsn {
|
||||
pub dst_rid: RegId,
|
||||
pub src_rid: RegId,
|
||||
pub msg_cst: ImmId,
|
||||
pub opcode: Opcode,
|
||||
}
|
||||
|
||||
/// PeMem instructions
|
||||
/// LD/St operation with one register and one memory slot
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct PeMemInsn {
|
||||
pub rid: RegId,
|
||||
pub slot: MemId,
|
||||
pub opcode: Opcode,
|
||||
}
|
||||
|
||||
/// PePbs instructions
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct PePbsInsn {
|
||||
pub dst_rid: RegId,
|
||||
pub src_rid: RegId,
|
||||
pub gid: PbsGid,
|
||||
pub opcode: Opcode,
|
||||
}
|
||||
|
||||
/// PeSync instructions
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct PeSyncInsn {
|
||||
pub sid: SyncId,
|
||||
pub opcode: Opcode,
|
||||
}
|
||||
245
backends/tfhe-hpu-backend/src/asm/dop/fmt.rs
Normal file
245
backends/tfhe-hpu-backend/src/asm/dop/fmt.rs
Normal file
@@ -0,0 +1,245 @@
|
||||
//!
|
||||
//! Define binary format encoding of instructions
|
||||
//! Rely on `bitfield_struct` crate to define bit-accurate insn format and enable serde to
|
||||
//! byte-stream
|
||||
//!
|
||||
//! Provide conversion implementation between raw bitfield and DOp types
|
||||
use bitfield_struct::bitfield;
|
||||
|
||||
use super::*;
|
||||
|
||||
// List of DOp format with there associated encoding
|
||||
// NB: typedef couldn't be used in bitfield_struct macro. Thus macro rely on u32 instead of
|
||||
// DOpRepr...
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
/// Raw type used for encoding
|
||||
pub type DOpRepr = u32;
|
||||
|
||||
#[enum_dispatch]
|
||||
pub trait ToHex {
|
||||
fn to_hex(&self) -> DOpRepr;
|
||||
}
|
||||
|
||||
/// DOp raw encoding used for Opcode extraction
|
||||
#[bitfield(u32)]
|
||||
pub struct DOpRawHex {
|
||||
#[bits(26)]
|
||||
_reserved: u32,
|
||||
#[bits(6)]
|
||||
pub opcode: u8,
|
||||
}
|
||||
|
||||
/// PeArith instructions
|
||||
/// Arithmetic operation that use one destination register and two sources register
|
||||
/// Have also an extra mul_factor field for MAC insn
|
||||
#[bitfield(u32)]
|
||||
pub struct PeArithHex {
|
||||
#[bits(7)]
|
||||
dst_rid: u8,
|
||||
#[bits(7)]
|
||||
src0_rid: u8,
|
||||
#[bits(7)]
|
||||
src1_rid: u8,
|
||||
#[bits(5)]
|
||||
mul_factor: u8,
|
||||
#[bits(6)]
|
||||
opcode: u8,
|
||||
}
|
||||
|
||||
impl From<&PeArithInsn> for PeArithHex {
|
||||
fn from(value: &PeArithInsn) -> Self {
|
||||
Self::new()
|
||||
.with_dst_rid(value.dst_rid.0)
|
||||
.with_src0_rid(value.src0_rid.0)
|
||||
.with_src1_rid(value.src1_rid.0)
|
||||
.with_mul_factor(value.mul_factor.0)
|
||||
.with_opcode(value.opcode.into())
|
||||
}
|
||||
}
|
||||
impl From<&PeArithHex> for PeArithInsn {
|
||||
fn from(value: &PeArithHex) -> Self {
|
||||
Self {
|
||||
dst_rid: RegId(value.dst_rid()),
|
||||
src0_rid: RegId(value.src0_rid()),
|
||||
src1_rid: RegId(value.src1_rid()),
|
||||
mul_factor: MulFactor(value.mul_factor()),
|
||||
opcode: Opcode::from(value.opcode()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// PeaMsg instructions
|
||||
/// Arithmetic operation that use one destination register, one source register and an immediat
|
||||
/// value
|
||||
#[bitfield(u32)]
|
||||
pub struct PeArithMsgHex {
|
||||
#[bits(7)]
|
||||
dst_rid: u8,
|
||||
#[bits(7)]
|
||||
src_rid: u8,
|
||||
#[bits(1)]
|
||||
msg_mode: bool,
|
||||
#[bits(11)]
|
||||
msg_cst: u16,
|
||||
#[bits(6)]
|
||||
opcode: u8,
|
||||
}
|
||||
// Define encoding for msg_mode
|
||||
const IMM_CST: bool = false;
|
||||
const IMM_VAR: bool = true;
|
||||
|
||||
impl From<&PeArithMsgInsn> for PeArithMsgHex {
|
||||
fn from(value: &PeArithMsgInsn) -> Self {
|
||||
let (mode, cst) = match value.msg_cst {
|
||||
ImmId::Cst(cst) => (IMM_CST, cst),
|
||||
ImmId::Var { tid, bid } => (IMM_VAR, (((tid as u16) << 8) + bid as u16)),
|
||||
};
|
||||
|
||||
Self::new()
|
||||
.with_dst_rid(value.dst_rid.0)
|
||||
.with_src_rid(value.src_rid.0)
|
||||
.with_msg_mode(mode)
|
||||
.with_msg_cst(cst)
|
||||
.with_opcode(value.opcode.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&PeArithMsgHex> for PeArithMsgInsn {
|
||||
fn from(value: &PeArithMsgHex) -> Self {
|
||||
let msg_cst = match value.msg_mode() {
|
||||
IMM_CST => ImmId::Cst(value.msg_cst()),
|
||||
IMM_VAR => ImmId::new_var((value.msg_cst() >> 8) as u8, (value.msg_cst() & 0xff) as u8),
|
||||
};
|
||||
|
||||
Self {
|
||||
dst_rid: RegId(value.dst_rid()),
|
||||
src_rid: RegId(value.src_rid()),
|
||||
msg_cst,
|
||||
opcode: Opcode::from(value.opcode()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// PeMem instructions
|
||||
/// LD/St operation with one register and one memory slot
|
||||
#[bitfield(u32)]
|
||||
pub struct PeMemHex {
|
||||
#[bits(7)]
|
||||
rid: u8,
|
||||
#[bits(1)]
|
||||
_pad: u8,
|
||||
#[bits(2)]
|
||||
mode: u8,
|
||||
#[bits(16)]
|
||||
slot: u16,
|
||||
#[bits(6)]
|
||||
opcode: u8,
|
||||
}
|
||||
|
||||
// Define encoding for mem_mode
|
||||
const MEM_ADDR: u8 = 0x0;
|
||||
const MEM_HEAP: u8 = 0x1;
|
||||
const MEM_SRC: u8 = 0x2;
|
||||
const MEM_DST: u8 = 0x3;
|
||||
|
||||
impl From<&PeMemInsn> for PeMemHex {
|
||||
fn from(value: &PeMemInsn) -> Self {
|
||||
let (mode, slot) = match value.slot {
|
||||
MemId::Addr(ct_id) => (MEM_ADDR, ct_id.0),
|
||||
MemId::Heap { bid } => (MEM_HEAP, bid),
|
||||
MemId::Src { tid, bid } => (MEM_SRC, ((tid as u16) << 8) + bid as u16),
|
||||
MemId::Dst { tid, bid } => (MEM_DST, ((tid as u16) << 8) + bid as u16),
|
||||
};
|
||||
|
||||
Self::new()
|
||||
.with_rid(value.rid.0)
|
||||
.with_mode(mode)
|
||||
.with_slot(slot)
|
||||
.with_opcode(value.opcode.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&PeMemHex> for PeMemInsn {
|
||||
fn from(value: &PeMemHex) -> Self {
|
||||
let slot = if MEM_ADDR == value.mode() {
|
||||
MemId::Addr(crate::asm::CtId(value.slot()))
|
||||
} else if MEM_HEAP == value.mode() {
|
||||
MemId::Heap { bid: value.slot() }
|
||||
} else if MEM_SRC == value.mode() {
|
||||
MemId::Src {
|
||||
tid: (value.slot() >> 8) as u8,
|
||||
bid: (value.slot() & 0xff) as u8,
|
||||
}
|
||||
} else if MEM_DST == value.mode() {
|
||||
MemId::Dst {
|
||||
tid: (value.slot() >> 8) as u8,
|
||||
bid: (value.slot() & 0xff) as u8,
|
||||
}
|
||||
} else {
|
||||
panic!("Unsupported memory mode")
|
||||
};
|
||||
|
||||
Self {
|
||||
rid: RegId(value.rid()),
|
||||
slot,
|
||||
opcode: Opcode::from(value.opcode()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// PePbs instructions
|
||||
#[bitfield(u32)]
|
||||
pub struct PePbsHex {
|
||||
#[bits(7)]
|
||||
dst_rid: u8,
|
||||
#[bits(7)]
|
||||
src_rid: u8,
|
||||
#[bits(12)]
|
||||
gid: u16,
|
||||
#[bits(6)]
|
||||
opcode: u8,
|
||||
}
|
||||
|
||||
impl From<&PePbsInsn> for PePbsHex {
|
||||
fn from(value: &PePbsInsn) -> Self {
|
||||
Self::new()
|
||||
.with_dst_rid(value.dst_rid.0)
|
||||
.with_src_rid(value.src_rid.0)
|
||||
.with_gid(value.gid.0)
|
||||
.with_opcode(value.opcode.into())
|
||||
}
|
||||
}
|
||||
impl From<&PePbsHex> for PePbsInsn {
|
||||
fn from(value: &PePbsHex) -> Self {
|
||||
Self {
|
||||
dst_rid: RegId(value.dst_rid()),
|
||||
src_rid: RegId(value.src_rid()),
|
||||
gid: PbsGid(value.gid()),
|
||||
opcode: Opcode::from(value.opcode()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// PeSync instructions
|
||||
#[bitfield(u32)]
|
||||
pub struct PeSyncHex {
|
||||
#[bits(26)]
|
||||
sid: u32,
|
||||
#[bits(6)]
|
||||
opcode: u8,
|
||||
}
|
||||
impl From<&PeSyncInsn> for PeSyncHex {
|
||||
fn from(value: &PeSyncInsn) -> Self {
|
||||
Self::new()
|
||||
.with_sid(value.sid.0)
|
||||
.with_opcode(value.opcode.into())
|
||||
}
|
||||
}
|
||||
impl From<&PeSyncHex> for PeSyncInsn {
|
||||
fn from(value: &PeSyncHex) -> Self {
|
||||
Self {
|
||||
sid: SyncId(value.sid()),
|
||||
opcode: Opcode::from(value.opcode()),
|
||||
}
|
||||
}
|
||||
}
|
||||
531
backends/tfhe-hpu-backend/src/asm/dop/mod.rs
Normal file
531
backends/tfhe-hpu-backend/src/asm/dop/mod.rs
Normal file
@@ -0,0 +1,531 @@
|
||||
pub mod arg;
|
||||
mod dop_macro;
|
||||
pub mod field;
|
||||
pub mod fmt;
|
||||
mod opcode;
|
||||
pub mod pbs_macro;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::{dop, impl_dop, impl_dop_parser};
|
||||
pub use arg::{FromAsm, IsFlush, ParsingError, ToAsm, ToFlush};
|
||||
pub use field::{
|
||||
ImmId, MemId, MulFactor, PbsGid, PeArithInsn, PeArithMsgInsn, PeMemInsn, PePbsInsn, PeSyncInsn,
|
||||
RegId, SyncId,
|
||||
};
|
||||
pub use fmt::{
|
||||
DOpRawHex, DOpRepr, PeArithHex, PeArithMsgHex, PeMemHex, PePbsHex, PeSyncHex, ToHex,
|
||||
};
|
||||
pub use opcode::{DOpType, Opcode};
|
||||
|
||||
dop!(
|
||||
// Arith operation
|
||||
["ADD", opcode::Opcode::ADD(), PeArithInsn],
|
||||
["SUB", opcode::Opcode::SUB(), PeArithInsn],
|
||||
["MAC", opcode::Opcode::MAC(), PeArithInsn{mul_factor}],
|
||||
|
||||
// ArithMsg operation
|
||||
["ADDS", opcode::Opcode::ADDS(), PeArithMsgInsn],
|
||||
["SUBS", opcode::Opcode::SUBS(), PeArithMsgInsn],
|
||||
["SSUB", opcode::Opcode::SSUB(), PeArithMsgInsn],
|
||||
["MULS", opcode::Opcode::MULS(), PeArithMsgInsn],
|
||||
|
||||
// Ld/st operation
|
||||
["LD", opcode::Opcode::LD(), PeMemInsn{ld}],
|
||||
["ST", opcode::Opcode::ST(), PeMemInsn{st}]
|
||||
|
||||
// Pbs operation
|
||||
["PBS", opcode::Opcode::PBS(1), PePbsInsn, "_F"],
|
||||
["PBS_ML2", opcode::Opcode::PBS(2), PePbsInsn, "_F"],
|
||||
["PBS_ML4", opcode::Opcode::PBS(4), PePbsInsn, "_F"],
|
||||
["PBS_ML8", opcode::Opcode::PBS(8), PePbsInsn, "_F"],
|
||||
|
||||
// Pbs flush operation
|
||||
["PBS_F", opcode::Opcode::PBS_F(1), PePbsInsn],
|
||||
["PBS_ML2_F", opcode::Opcode::PBS_F(2), PePbsInsn],
|
||||
["PBS_ML4_F", opcode::Opcode::PBS_F(4), PePbsInsn],
|
||||
["PBS_ML8_F", opcode::Opcode::PBS_F(8), PePbsInsn],
|
||||
|
||||
// Sync operation
|
||||
["SYNC", opcode::Opcode::SYNC(), PeSyncInsn],
|
||||
);
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct DigitParameters {
|
||||
pub msg_w: usize,
|
||||
pub carry_w: usize,
|
||||
}
|
||||
|
||||
impl DigitParameters {
|
||||
/// Msg field only
|
||||
pub fn msg_mask(&self) -> usize {
|
||||
(1 << self.msg_w) - 1
|
||||
}
|
||||
/// Carry field only
|
||||
pub fn carry_mask(&self) -> usize {
|
||||
((1 << (self.carry_w)) - 1) << self.msg_w
|
||||
}
|
||||
/// Padding bit only
|
||||
pub fn padding_mask(&self) -> usize {
|
||||
1 << (self.carry_w + self.msg_w)
|
||||
}
|
||||
|
||||
/// carry + msg fields only
|
||||
pub fn data_mask(&self) -> usize {
|
||||
self.carry_mask() | self.msg_mask()
|
||||
}
|
||||
/// Padding + carry + msg fields
|
||||
pub fn raw_mask(&self) -> usize {
|
||||
self.padding_mask() | self.data_mask()
|
||||
}
|
||||
|
||||
/// Message range (used for neg operation)
|
||||
pub fn msg_range(&self) -> usize {
|
||||
1 << self.msg_w
|
||||
}
|
||||
|
||||
/// Compute available linear operation based on carry_w/msg_w
|
||||
// TODO: Find a proper way to have nu < carry_w (i.e ManyLutPbs case)
|
||||
pub fn nu(&self) -> usize {
|
||||
(self.carry_mask() + self.msg_mask()) / self.msg_mask()
|
||||
}
|
||||
|
||||
pub fn total_width(&self) -> usize {
|
||||
self.msg_w + self.carry_w
|
||||
}
|
||||
}
|
||||
|
||||
/// Base trait to depict an Pbs function
|
||||
/// Provides a set of method to reason about pbs
|
||||
#[enum_dispatch]
|
||||
pub trait PbsLut {
|
||||
fn name(&self) -> &'static str;
|
||||
fn gid(&self) -> PbsGid;
|
||||
fn lut_nb(&self) -> u8;
|
||||
fn lut_lg(&self) -> u8;
|
||||
fn fn_at(&self, pos: usize, params: &DigitParameters, val: usize) -> usize;
|
||||
fn deg_at(&self, pos: usize, params: &DigitParameters, deg: usize) -> usize;
|
||||
// Blanket implementation
|
||||
fn lut_msk(&self) -> usize {
|
||||
usize::MAX << self.lut_lg()
|
||||
}
|
||||
}
|
||||
|
||||
use crate::{impl_pbs, pbs};
|
||||
use enum_dispatch::enum_dispatch;
|
||||
use pbs_macro::{CMP_EQUAL, CMP_INFERIOR, CMP_SUPERIOR};
|
||||
|
||||
pbs!(
|
||||
["None" => 0 [
|
||||
@0 =>{
|
||||
|_params: &DigitParameters, val | val;
|
||||
|_params: &DigitParameters, deg| deg;
|
||||
}
|
||||
]],
|
||||
["MsgOnly" => 1 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | val & params.msg_mask();
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
}
|
||||
]],
|
||||
["CarryOnly" => 2 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | val & params.carry_mask();
|
||||
|params: &DigitParameters, _deg| params.carry_mask();
|
||||
}
|
||||
]],
|
||||
["CarryInMsg" => 3 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | (val & params.carry_mask()) >> params.msg_w;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
}
|
||||
]]
|
||||
["MultCarryMsg" => 4 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | (((val & params.carry_mask()) >> params.msg_w) * (val & params.msg_mask())) & params.data_mask();
|
||||
|params: &DigitParameters, _deg| params.data_mask();
|
||||
}
|
||||
]],
|
||||
["MultCarryMsgLsb" => 5 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | (((val & params.carry_mask()) >> params.msg_w) * (val & params.msg_mask())) & params.msg_mask();
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
]],
|
||||
["MultCarryMsgMsb" => 6 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | ((((val & params.carry_mask()) >> params.msg_w) * (val & params.msg_mask())) >> params.msg_w) & params.msg_mask();
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
}
|
||||
]],
|
||||
["BwAnd" => 7 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | (((val & params.carry_mask()) >> params.msg_w) & (val & params.msg_mask())) & params.msg_mask();
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
}
|
||||
]],
|
||||
["BwOr" => 8 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | (((val & params.carry_mask()) >> params.msg_w) | (val & params.msg_mask())) & params.msg_mask();
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
}
|
||||
]],
|
||||
["BwXor" => 9 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | (((val & params.carry_mask()) >> params.msg_w) ^ (val & params.msg_mask())) & params.msg_mask();
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
}
|
||||
]],
|
||||
|
||||
["CmpSign" => 10 [
|
||||
@0 =>{
|
||||
|_params: &DigitParameters, val | {
|
||||
// Signed comparison with 0. Based on behavior of negacyclic function.
|
||||
// Example for Padding| 4bit digits (i.e 2msg2Carry)
|
||||
// 1|xxxx -> SignLut -> -1 -> 0|1111
|
||||
// x|0000 -> SignLut -> 0 -> 0|0000
|
||||
// 0|xxxx -> SignLut -> 1 -> 0|0001
|
||||
if val != 0 {1} else {0}
|
||||
};
|
||||
// WARN: in practice return value with padding that could encode -1, 0, 1
|
||||
// But should always be follow by an add to reach back range 0, 1, 2
|
||||
// To ease degree handling considered an output degree of 1 to obtain
|
||||
// degree 2 after add
|
||||
// Not a perfect solution but the easiest to prevent degree error
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
["CmpReduce" => 11 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
// Carry contain MSB cmp result, msg LSB cmp result
|
||||
// Reduction is made from lsb to msb as follow
|
||||
// MSB | LSB | Out
|
||||
// Inferior | x | Inferior
|
||||
// Equal | x | x
|
||||
// Superior | x | Superior
|
||||
let carry_field = (val & params.carry_mask()) >> params.msg_w;
|
||||
let msg_field = val & params.msg_mask();
|
||||
|
||||
match (carry_field, msg_field) {
|
||||
(CMP_EQUAL, lsb_cmp) => lsb_cmp,
|
||||
_ => carry_field
|
||||
}
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 2;
|
||||
}
|
||||
]]
|
||||
|
||||
["CmpGt" => 12 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | match val & params.msg_mask() {
|
||||
CMP_SUPERIOR => 1,
|
||||
_ => 0,
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
["CmpGte" => 13 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | match val & params.msg_mask() {
|
||||
CMP_SUPERIOR | CMP_EQUAL => 1,
|
||||
_ => 0,
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
// Could be merge with Gt/Gte
|
||||
["CmpLt" => 14 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | match val & params.msg_mask() {
|
||||
CMP_INFERIOR => 1,
|
||||
_ => 0,
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
["CmpLte" => 15 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | match val & params.msg_mask() {
|
||||
CMP_INFERIOR | CMP_EQUAL => 1,
|
||||
_ => 0,
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
["CmpEq" => 16 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | match val & params.msg_mask() {
|
||||
CMP_EQUAL => 1,
|
||||
_ => 0,
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
["CmpNeq" => 17 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | match val & params.msg_mask() {
|
||||
CMP_EQUAL => 0,
|
||||
_ => 1,
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
["ManyGenProp" => 18 [ // Turns carry save into a generate/propagate pair and message with manyLUT
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val| { val & params.msg_mask()};
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@1 =>{
|
||||
|params: &DigitParameters, val| {
|
||||
((val & params.carry_mask()) >> (params.msg_w)) << 1| // Generate
|
||||
(((val & params.msg_mask()) == params.msg_mask()) as usize) // Propagate
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 3;
|
||||
}
|
||||
]],
|
||||
["ReduceCarry2" => 19 [ // Reduces a carry propagation add to two bits from an
|
||||
// input in which the carry is in the second bit.
|
||||
@0 =>{
|
||||
|_params: &DigitParameters, val | {
|
||||
let carry = val >> 2;
|
||||
let prop = (val & 3 == 3) as usize;
|
||||
(carry << 1) | prop
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 3;
|
||||
}
|
||||
]],
|
||||
["ReduceCarry3" => 20 [ // Reduces a carry propagation add to two bits from an
|
||||
// input in which the carry is in the third bit.
|
||||
@0 =>{
|
||||
|_params: &DigitParameters, val | {
|
||||
let carry = val >> 3;
|
||||
let prop = (val & 7 == 7) as usize;
|
||||
(carry << 1) | prop
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 3;
|
||||
}
|
||||
]],
|
||||
["ReduceCarryPad" => 21 [ // Reduces a carry propagation add to two bits from an
|
||||
// input in which the carry is in the padding bit.
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
if val == params.data_mask() {
|
||||
0
|
||||
} else {
|
||||
params.raw_mask()
|
||||
}
|
||||
};
|
||||
|params: &DigitParameters, _deg| params.raw_mask();
|
||||
}
|
||||
]],
|
||||
["GenPropAdd" => 22 [ // Adds a generate/propagate pair with a message modulus message
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
let lhs = val & params.msg_mask();
|
||||
let rhs = (val & params.carry_mask()) >> params.msg_w;
|
||||
let rhs_gen = rhs >> 1;
|
||||
(lhs + rhs_gen) & params.msg_mask()
|
||||
};
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
}
|
||||
]],
|
||||
|
||||
["IfTrueZeroed" => 23 [ // Ct must contain CondCt in Carry and ValueCt in Msg. If condition it's *TRUE*, value ct is forced to 0
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
let value = val & params.msg_mask();
|
||||
let cond = (val & params.carry_mask()) >> params.msg_w;
|
||||
if cond != 0 {0} else {value}
|
||||
};
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
}
|
||||
]],
|
||||
["IfFalseZeroed" => 24 [ // Ct must contain CondCt in Carry and ValueCt in Msg. If condition it's *FALSE*, value ct is forced to 0
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
let value = val & params.msg_mask();
|
||||
let cond = (val & params.carry_mask()) >> params.msg_w;
|
||||
if cond != 0 {value} else {0}
|
||||
};
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
}
|
||||
]],
|
||||
["Ripple2GenProp" => 25 [ // Converts from Ripple carry to GenProp
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
(val & params.msg_mask()) * 2
|
||||
};
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
}
|
||||
]],
|
||||
|
||||
// Below Pbs are defined for Test only
|
||||
["TestMany2" => 128 [
|
||||
@0 =>{
|
||||
|_params: &DigitParameters, val | val;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@1 =>{
|
||||
|_params: &DigitParameters, val | val +1;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
]],
|
||||
["TestMany4" => 129 [
|
||||
@0 =>{
|
||||
|_params: &DigitParameters, val | val;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@1 =>{
|
||||
|_params: &DigitParameters, val | val +1;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@2 =>{
|
||||
|_params: &DigitParameters, val | val +2;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@3 =>{
|
||||
|_params: &DigitParameters, val | val +3;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
]],
|
||||
["TestMany8" => 130 [
|
||||
@0 =>{
|
||||
|_params: &DigitParameters, val | val;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@1 =>{
|
||||
|_params: &DigitParameters, val | val +1;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@2 =>{
|
||||
|_params: &DigitParameters, val | val +2;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@3 =>{
|
||||
|_params: &DigitParameters, val | val +3;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@4 =>{
|
||||
|_params: &DigitParameters, val | val +4;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@5 =>{
|
||||
|_params: &DigitParameters, val | val +5;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@6 =>{
|
||||
|_params: &DigitParameters, val | val +6;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@7 =>{
|
||||
|_params: &DigitParameters, val | val +7;
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
]],
|
||||
["ManyCarryMsg" => 26 [ // Turns carry save into carry and message with manyLUT
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val| { val & params.msg_mask()};
|
||||
|params: &DigitParameters, _deg| params.msg_mask();
|
||||
},
|
||||
@1 =>{
|
||||
|params: &DigitParameters, val| { val >> params.msg_w };
|
||||
|params: &DigitParameters, _deg| ((1 << (params.carry_w - 1)) - 1);
|
||||
}
|
||||
]],
|
||||
["CmpGtMrg" => 27 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
let carry_field = (val & params.carry_mask()) >> params.msg_w;
|
||||
let msg_field = val & params.msg_mask();
|
||||
|
||||
match (carry_field, msg_field) {
|
||||
(CMP_SUPERIOR, _) |
|
||||
(CMP_EQUAL, CMP_SUPERIOR) => 1,
|
||||
_ => 0,
|
||||
}
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
["CmpGteMrg" => 28 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
let carry_field = (val & params.carry_mask()) >> params.msg_w;
|
||||
let msg_field = val & params.msg_mask();
|
||||
|
||||
match (carry_field, msg_field) {
|
||||
(CMP_SUPERIOR, _) |
|
||||
(CMP_EQUAL, CMP_SUPERIOR) |
|
||||
(CMP_EQUAL, CMP_EQUAL) => 1,
|
||||
_ => 0,
|
||||
}
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
["CmpLtMrg" => 29 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
let carry_field = (val & params.carry_mask()) >> params.msg_w;
|
||||
let msg_field = val & params.msg_mask();
|
||||
|
||||
match (carry_field, msg_field) {
|
||||
(CMP_INFERIOR, _) |
|
||||
(CMP_EQUAL, CMP_INFERIOR) => 1,
|
||||
_ => 0,
|
||||
}
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
["CmpLteMrg" => 30 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
let carry_field = (val & params.carry_mask()) >> params.msg_w;
|
||||
let msg_field = val & params.msg_mask();
|
||||
|
||||
match (carry_field, msg_field) {
|
||||
(CMP_INFERIOR, _) |
|
||||
(CMP_EQUAL, CMP_INFERIOR) |
|
||||
(CMP_EQUAL, CMP_EQUAL) => 1,
|
||||
_ => 0,
|
||||
}
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
["CmpEqMrg" => 31 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
let carry_field = (val & params.carry_mask()) >> params.msg_w;
|
||||
let msg_field = val & params.msg_mask();
|
||||
|
||||
match (carry_field, msg_field) {
|
||||
(CMP_EQUAL, CMP_EQUAL) => 1,
|
||||
_ => 0,
|
||||
}
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
["CmpNeqMrg" => 32 [
|
||||
@0 =>{
|
||||
|params: &DigitParameters, val | {
|
||||
let carry_field = (val & params.carry_mask()) >> params.msg_w;
|
||||
let msg_field = val & params.msg_mask();
|
||||
|
||||
match (carry_field, msg_field) {
|
||||
(CMP_EQUAL, CMP_EQUAL) => 0,
|
||||
_ => 1,
|
||||
}
|
||||
};
|
||||
|_params: &DigitParameters, _deg| 1;
|
||||
}
|
||||
]],
|
||||
);
|
||||
|
||||
pub(crate) fn ceil_ilog2(value: &u8) -> u8 {
|
||||
(value.ilog2() + u32::from(!value.is_power_of_two())) as u8
|
||||
}
|
||||
166
backends/tfhe-hpu-backend/src/asm/dop/opcode.rs
Normal file
166
backends/tfhe-hpu-backend/src/asm/dop/opcode.rs
Normal file
@@ -0,0 +1,166 @@
|
||||
//!
|
||||
//! Define hex encoding for a subset of known DOp
|
||||
//! DOp are defined with two section: {Type, subtype}
|
||||
|
||||
/// Opcode structure
|
||||
/// Gather DOp type and subtype
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
||||
pub struct Opcode {
|
||||
optype: DOpType,
|
||||
subtype: u8,
|
||||
}
|
||||
|
||||
/// Define Instruction type as C-like enumeration
|
||||
/// Types are encoded with 2bits
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
||||
pub enum DOpType {
|
||||
ARITH = 0b00,
|
||||
SYNC = 0b01,
|
||||
MEM = 0b10,
|
||||
PBS = 0b11,
|
||||
}
|
||||
|
||||
/// Define raw type conversion
|
||||
/// Opcode is on 6bits
|
||||
impl From<Opcode> for u8 {
|
||||
fn from(value: Opcode) -> Self {
|
||||
(((value.optype as u8) & 0x3) << 4) + value.subtype
|
||||
}
|
||||
}
|
||||
impl From<u8> for Opcode {
|
||||
fn from(value: u8) -> Self {
|
||||
let subtype = value & 0xf;
|
||||
let optype_raw = (value >> 4) & 0x3;
|
||||
let optype = match optype_raw {
|
||||
x if x == DOpType::ARITH as u8 => DOpType::ARITH,
|
||||
x if x == DOpType::SYNC as u8 => DOpType::SYNC,
|
||||
x if x == DOpType::MEM as u8 => DOpType::MEM,
|
||||
x if x == DOpType::PBS as u8 => DOpType::PBS,
|
||||
_ => panic!("Invalid DOpType"),
|
||||
};
|
||||
|
||||
Self { optype, subtype }
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement helper function to create Arith DOp
|
||||
impl Opcode {
|
||||
#[allow(non_snake_case)]
|
||||
pub fn ADD() -> Self {
|
||||
Self {
|
||||
optype: DOpType::ARITH,
|
||||
subtype: 0b0001,
|
||||
}
|
||||
}
|
||||
#[allow(non_snake_case)]
|
||||
pub fn SUB() -> Self {
|
||||
Self {
|
||||
optype: DOpType::ARITH,
|
||||
subtype: 0b0010,
|
||||
}
|
||||
}
|
||||
#[allow(non_snake_case)]
|
||||
pub fn MAC() -> Self {
|
||||
Self {
|
||||
optype: DOpType::ARITH,
|
||||
subtype: 0b0101,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement helper function to create ArithMsg DOp
|
||||
impl Opcode {
|
||||
#[allow(non_snake_case)]
|
||||
pub fn ADDS() -> Self {
|
||||
Self {
|
||||
optype: DOpType::ARITH,
|
||||
subtype: 0b1001,
|
||||
}
|
||||
}
|
||||
#[allow(non_snake_case)]
|
||||
pub fn SUBS() -> Self {
|
||||
Self {
|
||||
optype: DOpType::ARITH,
|
||||
subtype: 0b1010,
|
||||
}
|
||||
}
|
||||
#[allow(non_snake_case)]
|
||||
pub fn SSUB() -> Self {
|
||||
Self {
|
||||
optype: DOpType::ARITH,
|
||||
subtype: 0b1011,
|
||||
}
|
||||
}
|
||||
#[allow(non_snake_case)]
|
||||
pub fn MULS() -> Self {
|
||||
Self {
|
||||
optype: DOpType::ARITH,
|
||||
subtype: 0b1100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement helper function to create Sync DOp
|
||||
impl Opcode {
|
||||
#[allow(non_snake_case)]
|
||||
pub fn SYNC() -> Self {
|
||||
Self {
|
||||
optype: DOpType::SYNC,
|
||||
subtype: 0b0000,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement helper function to create Memory DOp
|
||||
impl Opcode {
|
||||
#[allow(non_snake_case)]
|
||||
pub fn LD() -> Self {
|
||||
Self {
|
||||
optype: DOpType::MEM,
|
||||
subtype: 0b0000,
|
||||
}
|
||||
}
|
||||
#[allow(non_snake_case)]
|
||||
pub fn ST() -> Self {
|
||||
Self {
|
||||
optype: DOpType::MEM,
|
||||
subtype: 0b0001,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement helper function to create Memory DOp
|
||||
pub const PBS_HAS_FLUSH: u8 = 0b1000;
|
||||
impl Opcode {
|
||||
#[allow(non_snake_case)]
|
||||
pub fn PBS(lut_nb: u8) -> Self {
|
||||
let lut_lg = super::ceil_ilog2(&lut_nb);
|
||||
let subtype = lut_lg & 0x3;
|
||||
Self {
|
||||
optype: DOpType::PBS,
|
||||
subtype,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn PBS_F(lut_nb: u8) -> Self {
|
||||
let lut_lg = super::ceil_ilog2(&lut_nb);
|
||||
let subtype = PBS_HAS_FLUSH + (lut_lg & 0x3);
|
||||
Self {
|
||||
optype: DOpType::PBS,
|
||||
subtype,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Opcode {
|
||||
pub fn is_flush(&self) -> bool {
|
||||
(self.optype == DOpType::PBS) && (self.subtype & PBS_HAS_FLUSH) != 0
|
||||
}
|
||||
pub fn to_flush(&self) -> Self {
|
||||
Self {
|
||||
subtype: self.subtype | PBS_HAS_FLUSH,
|
||||
..*self
|
||||
}
|
||||
}
|
||||
}
|
||||
152
backends/tfhe-hpu-backend/src/asm/dop/pbs_macro.rs
Normal file
152
backends/tfhe-hpu-backend/src/asm/dop/pbs_macro.rs
Normal file
@@ -0,0 +1,152 @@
|
||||
//! Pbs definition is repetitive
|
||||
//!
|
||||
//! A macro rules is used to help with Pbs definition
|
||||
|
||||
pub const CMP_INFERIOR: usize = 0;
|
||||
pub const CMP_EQUAL: usize = 1;
|
||||
pub const CMP_SUPERIOR: usize = 2;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_pbs {
|
||||
(
|
||||
$pbs: literal => $gid: literal [
|
||||
$(@$id:literal => {
|
||||
$func: expr;
|
||||
$deg: expr$(;)?
|
||||
}$(,)?)+
|
||||
]
|
||||
) => {
|
||||
::paste::paste! {
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct [<Pbs $pbs:camel>]();
|
||||
|
||||
impl Default for [<Pbs $pbs:camel>]{
|
||||
fn default() -> Self {
|
||||
Self ()
|
||||
}
|
||||
}
|
||||
|
||||
impl PbsLut for [< Pbs $pbs:camel >] {
|
||||
fn name(&self) -> &'static str {
|
||||
$pbs
|
||||
}
|
||||
fn gid(&self) -> PbsGid {
|
||||
PbsGid($gid)
|
||||
}
|
||||
fn lut_nb(&self) -> u8 {
|
||||
if let Some(max) = [$($id,)*].iter().max() {
|
||||
max +1} else {0}
|
||||
}
|
||||
fn lut_lg(&self) -> u8 {
|
||||
ceil_ilog2(&self.lut_nb())
|
||||
}
|
||||
|
||||
fn fn_at(&self, pos: usize, params: &DigitParameters, val: usize ) -> usize {
|
||||
match pos {
|
||||
$(
|
||||
$id => ($func)(params, val),
|
||||
)*
|
||||
_ => {
|
||||
// Unspecified -> Default to identity
|
||||
val
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn deg_at(&self, pos: usize, params: &DigitParameters, deg: usize ) -> usize {
|
||||
match pos {
|
||||
$(
|
||||
$id => ($deg)(params, deg),
|
||||
)*
|
||||
_ => {
|
||||
// Unspecified -> Default to identity
|
||||
deg
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! pbs {
|
||||
(
|
||||
$([$pbs: literal => $gid: literal [
|
||||
$(@$id:literal => {
|
||||
$func: expr;
|
||||
$deg: expr$(;)?
|
||||
}$(,)?)+]
|
||||
] $(,)?)*
|
||||
) => {
|
||||
::paste::paste! {
|
||||
$(
|
||||
impl_pbs!($pbs => $gid [ $(@$id => {$func; $deg;},)*]);
|
||||
)*
|
||||
|
||||
/// Aggregate Pbs concrete type in one enumeration
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[enum_dispatch(PbsLut)]
|
||||
pub enum Pbs{
|
||||
$([< $pbs:camel >]([< Pbs $pbs:camel >]),)*
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Pbs {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "Pbs{}", self.name())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for Pbs {
|
||||
type Err = ParsingError;
|
||||
|
||||
fn from_str(name: &str) -> Result<Self, Self::Err> {
|
||||
if let Some(lut) = PBS_LUT.asm.get(name) {
|
||||
Ok(lut.clone())
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(format!("Pbs{name} unknown")))
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
impl Pbs {
|
||||
pub fn from_hex(gid: PbsGid) -> Result<Self, ParsingError> {
|
||||
if let Some(pbs) = PBS_LUT.hex.get(&gid) {
|
||||
Ok(pbs.clone())
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(format!("Pbs {gid:?} unknown")))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_all() -> Vec<Self> {
|
||||
PBS_LUT.hex.values().map(|pbs| pbs.clone()).collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
/// Parser utilities
|
||||
/// Hashmap for Name -> to fromArg impl
|
||||
struct PbsFromArg{
|
||||
asm: HashMap<String, Pbs>,
|
||||
hex: HashMap<PbsGid, Pbs>,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref PBS_LUT: PbsFromArg = {
|
||||
|
||||
let mut pbs_from_arg = PbsFromArg{
|
||||
asm: HashMap::new(),
|
||||
hex: HashMap::new(),
|
||||
};
|
||||
|
||||
$(
|
||||
let pbs = Pbs::[< $pbs:camel >]([< Pbs $pbs >]::default());
|
||||
pbs_from_arg.asm.insert(stringify!([< $pbs:camel >]).to_string(), pbs.clone());
|
||||
pbs_from_arg.hex.insert(pbs.gid(), pbs);
|
||||
)*
|
||||
pbs_from_arg
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
545
backends/tfhe-hpu-backend/src/asm/iop/arg.rs
Normal file
545
backends/tfhe-hpu-backend/src/asm/iop/arg.rs
Normal file
@@ -0,0 +1,545 @@
|
||||
//!
|
||||
//! Gather IOp argument in a common type
|
||||
//! Provides a FromStr implementation for parsing
|
||||
|
||||
use super::*;
|
||||
use field::{
|
||||
FwMode, IOpHeader, IOpcode, ImmBundle, Immediat, Operand, OperandBlock, OperandBundle,
|
||||
};
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
pub const ASM_OPCODE_WIDTH: usize = 8;
|
||||
|
||||
/// Parsing error
|
||||
#[derive(thiserror::Error, Debug, Clone)]
|
||||
pub enum ParsingError {
|
||||
#[error("Opcode {0} is in in reserved range")]
|
||||
Opcode(u8),
|
||||
#[error("Unknown IOp alias {0}")]
|
||||
Opalias(String),
|
||||
#[error("Unmatch Asm Operation: {0}")]
|
||||
Unmatch(String),
|
||||
#[error("Invalid arguments number: expect {0}, get {1}")]
|
||||
ArgNumber(usize, usize),
|
||||
#[error("Invalid arguments type: expect {0}, get {1}")]
|
||||
ArgType(String, Arg),
|
||||
#[error("Invalid arguments: {0}")]
|
||||
InvalidArg(String),
|
||||
#[error("Empty line")]
|
||||
Empty,
|
||||
}
|
||||
|
||||
// Asm arguments are slightly different that hex word
|
||||
// Thus we can't directly mapped ASM arg to fmt structure
|
||||
// Below, we define a set of arguments for parsing purpose
|
||||
|
||||
/// Define fixed inner IOp (opposed as user available IOP)
|
||||
/// Those IOp are generated by the Fw and have a fixed number of arguments
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub struct IOpFormat {
|
||||
pub name: String,
|
||||
pub opcode: IOpcode,
|
||||
pub proto: IOpProto,
|
||||
}
|
||||
|
||||
/// Opcode asm parsing utility
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub struct AsmIOpcode {
|
||||
pub(crate) opcode: IOpcode,
|
||||
pub(crate) format: Option<IOpFormat>,
|
||||
}
|
||||
|
||||
impl AsmIOpcode {
|
||||
pub fn from_opcode(opcode: IOpcode) -> Self {
|
||||
if let Some(alias) = IOP_LUT.hex.get(&opcode) {
|
||||
Self {
|
||||
opcode,
|
||||
format: Some(alias.clone()),
|
||||
}
|
||||
} else {
|
||||
Self {
|
||||
opcode,
|
||||
format: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn opcode(&self) -> IOpcode {
|
||||
self.opcode
|
||||
}
|
||||
|
||||
pub fn format(&self) -> Option<&IOpFormat> {
|
||||
self.format.as_ref()
|
||||
}
|
||||
pub fn has_imm(&self) -> bool {
|
||||
if let Some(alias) = self.format.as_ref() {
|
||||
alias.proto.imm != 0
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for AsmIOpcode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let name = if let Some(alias) = &self.format {
|
||||
&alias.name
|
||||
} else {
|
||||
&format!("IOP[0x{:x}]", self.opcode.0)
|
||||
};
|
||||
write!(f, "{name: <ASM_OPCODE_WIDTH$}")
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract AsmOpcode from IOpcode
|
||||
impl From<IOpcode> for AsmIOpcode {
|
||||
fn from(opcode: IOpcode) -> Self {
|
||||
if let Some(alias) = IOP_LUT.hex.get(&opcode) {
|
||||
Self {
|
||||
opcode,
|
||||
format: Some(alias.clone()),
|
||||
}
|
||||
} else {
|
||||
Self {
|
||||
opcode,
|
||||
format: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract AsmOpcode from IOp
|
||||
/// This is used from proper rendering in asm
|
||||
impl From<&IOp> for AsmIOpcode {
|
||||
fn from(iop: &IOp) -> Self {
|
||||
Self::from(iop.header.opcode)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for AsmIOpcode {
|
||||
type Err = ParsingError;
|
||||
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
lazy_static! {
|
||||
static ref OPCODE_ARG_RE: regex::Regex = regex::Regex::new(
|
||||
r"(?<raw>^IOP\[((?<hex_val>0x[0-9a-fA-F]+)|(?<val>[0-9]+))\])|^(?<alias>\w+)"
|
||||
)
|
||||
.expect("Invalid regex");
|
||||
}
|
||||
|
||||
if let Some(caps) = OPCODE_ARG_RE.captures(s) {
|
||||
if let Some(_raw) = caps.name("raw") {
|
||||
let value = if let Some(raw_val) = caps.name("val") {
|
||||
raw_val
|
||||
.as_str()
|
||||
.parse::<u8>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
} else {
|
||||
// One of them must match, otherwise error will be arose before
|
||||
let raw_hex_val = caps.name("hex_val").unwrap();
|
||||
u8::from_str_radix(&raw_hex_val.as_str()[2..], 16)
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
};
|
||||
if (opcode::USER_RANGE_LB..=opcode::USER_RANGE_UB).contains(&value) {
|
||||
Ok(AsmIOpcode {
|
||||
opcode: IOpcode(value),
|
||||
format: None,
|
||||
})
|
||||
} else {
|
||||
Err(ParsingError::Opcode(value))
|
||||
}
|
||||
} else if let Some(alias) = caps.name("alias") {
|
||||
if let Some(alias) = IOP_LUT.asm.get(alias.as_str()) {
|
||||
Ok(AsmIOpcode {
|
||||
opcode: alias.opcode,
|
||||
format: Some(alias.clone()),
|
||||
})
|
||||
} else {
|
||||
Err(ParsingError::Opalias(alias.as_str().to_string()))
|
||||
}
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(format!(
|
||||
"Invalid argument format {s}"
|
||||
)))
|
||||
}
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(format!(
|
||||
"Invalid argument format {s}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&AsmIOpcode> for IOpcode {
|
||||
fn from(asm: &AsmIOpcode) -> Self {
|
||||
asm.opcode
|
||||
}
|
||||
}
|
||||
|
||||
/// Properties asm parsing utility
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Properties {
|
||||
fw_mode: FwMode,
|
||||
dst_align: OperandBlock,
|
||||
src_align: OperandBlock,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Properties {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let mode = match self.fw_mode {
|
||||
FwMode::Static => "",
|
||||
FwMode::Dynamic => "dyn ",
|
||||
};
|
||||
write!(
|
||||
f,
|
||||
"{}I{} I{}",
|
||||
mode,
|
||||
(self.dst_align.0 + 1) * MSG_WIDTH,
|
||||
(self.src_align.0 + 1) * MSG_WIDTH,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract properties from IOpHeader
|
||||
impl From<&IOpHeader> for Properties {
|
||||
fn from(value: &IOpHeader) -> Self {
|
||||
Self {
|
||||
fw_mode: value.fw_mode,
|
||||
dst_align: value.dst_align,
|
||||
src_align: value.src_align,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for Properties {
|
||||
type Err = ParsingError;
|
||||
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
lazy_static! {
|
||||
static ref PROPERTIES_ARG_RE: regex::Regex =
|
||||
regex::Regex::new(r"(?<fw>dyn)?\s*I(?<dst>\d+)\s*I(?<src>\d+)")
|
||||
.expect("Invalid regex");
|
||||
}
|
||||
|
||||
if let Some(caps) = PROPERTIES_ARG_RE.captures(s) {
|
||||
let fw_mode = if caps.name("fw").is_some() {
|
||||
FwMode::Dynamic
|
||||
} else {
|
||||
FwMode::Static
|
||||
};
|
||||
let src_width = caps["src"]
|
||||
.parse::<u16>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?;
|
||||
let src_align = OperandBlock::new((src_width / MSG_WIDTH as u16) as u8);
|
||||
let dst_width = caps["dst"]
|
||||
.parse::<u16>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?;
|
||||
let dst_align = OperandBlock::new((dst_width / MSG_WIDTH as u16) as u8);
|
||||
Ok(Properties {
|
||||
fw_mode,
|
||||
dst_align,
|
||||
src_align,
|
||||
})
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(format!(
|
||||
"Invalid argument format {s}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Operand {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
// Block/vec_size are zeroed indexed value
|
||||
// -> Transform them in one indexed for human readability
|
||||
let block = self.block.0 + 1;
|
||||
let vec_size = self.vec_size.0 + 1;
|
||||
if vec_size != 1 {
|
||||
write!(
|
||||
f,
|
||||
"I{}[{}]@0x{:0>2x}",
|
||||
block * MSG_WIDTH,
|
||||
vec_size,
|
||||
self.base_cid.0,
|
||||
)
|
||||
} else {
|
||||
write!(f, "I{}@0x{:0>2x}", block * MSG_WIDTH, self.base_cid.0,)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// OperandBundle
|
||||
// Addr are packed in <> in the ASM format and thus we only parse them by bundle
|
||||
impl std::fmt::Display for OperandBundle {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
self.iter()
|
||||
.fold(" ".to_string(), |acc, x| format!("{acc}{x} "))
|
||||
.trim()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for OperandBundle {
|
||||
type Err = ParsingError;
|
||||
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
lazy_static! {
|
||||
static ref ADDR_ARG_RE: regex::Regex =
|
||||
regex::Regex::new(r"I(?<width>\d+)((?<vec>\[\s*(?<vec_len>\d+)\s*\]@(0x(?<vec_hex_cid>[0-9a-fA-F]+)|(?<vec_cid>\d+))\s*)|(?<single>@(0x(?<hex_cid>[0-9a-fA-F]+)|(?<cid>\d+))\s*))")
|
||||
.expect("Invalid regex");
|
||||
}
|
||||
let mut operands = ADDR_ARG_RE
|
||||
.captures_iter(s)
|
||||
.map(|caps| {
|
||||
let width = caps["width"]
|
||||
.parse::<u16>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?;
|
||||
let block = (width / MSG_WIDTH as u16) as u8;
|
||||
|
||||
if let Some(_vec) = caps.name("vec") {
|
||||
let base_cid = if let Some(raw_cid) = caps.name("vec_cid") {
|
||||
raw_cid
|
||||
.as_str()
|
||||
.parse::<u16>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
} else {
|
||||
// One of them must match, otherwise error will be arose before
|
||||
let raw_hex_cid = caps.name("vec_hex_cid").unwrap();
|
||||
u16::from_str_radix(raw_hex_cid.as_str(), 16)
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
};
|
||||
let len = caps["vec_len"]
|
||||
.parse::<u8>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?;
|
||||
|
||||
Ok(Operand::new(block, base_cid, len, None))
|
||||
} else if let Some(_single) = caps.name("single") {
|
||||
let base_cid = if let Some(raw_cid) = caps.name("cid") {
|
||||
raw_cid
|
||||
.as_str()
|
||||
.parse::<u16>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
} else {
|
||||
// One of them must match, otherwise error will be arose before
|
||||
u16::from_str_radix(&caps["hex_cid"], 16)
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
};
|
||||
Ok(Operand::new(block, base_cid, 1, None))
|
||||
} else {
|
||||
return Err(ParsingError::Unmatch(format!(
|
||||
"Invalid argument format {s}"
|
||||
)));
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<_>, ParsingError>>()?;
|
||||
|
||||
// Empty OperandBundle is considered as parsing error
|
||||
if operands.is_empty() {
|
||||
Err(ParsingError::Unmatch(format!(
|
||||
"Invalid argument: Empty OperandBundle {s}"
|
||||
)))
|
||||
} else {
|
||||
// Update is_last token
|
||||
operands.last_mut().unwrap().is_last = true;
|
||||
Ok(operands.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ImmBundle
|
||||
// Imm are packed in <> in the ASM format and thus we only parse them by bundle
|
||||
impl std::fmt::Display for ImmBundle {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
self.iter()
|
||||
.fold(" ".to_string(), |acc, x| format!(
|
||||
"{acc}0x{:x} ",
|
||||
x.cst_value()
|
||||
))
|
||||
.trim()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for ImmBundle {
|
||||
type Err = ParsingError;
|
||||
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
lazy_static! {
|
||||
static ref IMM_ARG_RE: regex::Regex =
|
||||
regex::Regex::new(r"(0x(?<hex_imm>[0-9a-fA-F]+))|(?<imm>\d+)")
|
||||
.expect("Invalid regex");
|
||||
}
|
||||
let mut imms = IMM_ARG_RE
|
||||
.captures_iter(s)
|
||||
.map(|caps| {
|
||||
let imm = if let Some(raw_imm) = caps.name("imm") {
|
||||
raw_imm
|
||||
.as_str()
|
||||
.parse::<u128>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
} else {
|
||||
// One of them must match, otherwise error will be arose before
|
||||
let raw_hex_imm = caps.name("hex_imm").unwrap();
|
||||
u128::from_str_radix(raw_hex_imm.as_str(), 16)
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))?
|
||||
};
|
||||
|
||||
Ok(Immediat::from_cst(imm))
|
||||
})
|
||||
.collect::<Result<Vec<_>, ParsingError>>()?;
|
||||
|
||||
// Empty ImmBundle is considered as parsing error
|
||||
if imms.is_empty() {
|
||||
Err(ParsingError::Unmatch(format!(
|
||||
"Invalid argument format {s}"
|
||||
)))
|
||||
} else {
|
||||
// Update is_last token
|
||||
imms.last_mut().unwrap().is_last = true;
|
||||
Ok(imms.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generic arguments
|
||||
/// Used to pack argument under the same type
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Arg {
|
||||
Opcode(AsmIOpcode),
|
||||
Properties(Properties),
|
||||
Operand(OperandBundle),
|
||||
Imm(ImmBundle),
|
||||
}
|
||||
|
||||
/// Use Display trait to convert into asm human readable file
|
||||
/// Simply defer to inner type display impl while forcing the display width
|
||||
impl std::fmt::Display for Arg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
Arg::Opcode(inner) => write!(f, "{inner}"),
|
||||
Arg::Properties(inner) => write!(f, "{inner}"),
|
||||
Arg::Operand(inner) => write!(f, "{inner}"),
|
||||
Arg::Imm(inner) => write!(f, "{inner}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Use FromStr trait to decode from asm file
|
||||
impl std::str::FromStr for Arg {
|
||||
type Err = ParsingError;
|
||||
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match (
|
||||
OperandBundle::from_str(s),
|
||||
AsmIOpcode::from_str(s),
|
||||
Properties::from_str(s),
|
||||
ImmBundle::from_str(s),
|
||||
) {
|
||||
(Ok(operand), ..) => Ok(Self::Operand(operand)),
|
||||
(Err(_), Ok(opcode), ..) => Ok(Self::Opcode(opcode)),
|
||||
(Err(_), Err(_), Ok(props), ..) => Ok(Self::Properties(props)),
|
||||
(Err(_), Err(_), Err(_), Ok(imm)) => Ok(Self::Imm(imm)),
|
||||
(Err(addr), Err(opcode), Err(props), Err(imm)) => Err(ParsingError::Unmatch(format!(
|
||||
"{s}:
|
||||
Addr failed with{addr}
|
||||
Opcode failed with{opcode}
|
||||
Props failed with{props}
|
||||
Imm failed with{imm}
|
||||
"
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for IOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let opcode = AsmIOpcode::from(self);
|
||||
write!(f, "{opcode}")?;
|
||||
|
||||
let props = Properties::from(&self.header);
|
||||
write!(f, " <{props}>")?;
|
||||
|
||||
// Destination operands list
|
||||
write!(f, " <{}>", self.dst)?;
|
||||
|
||||
// Source operands list
|
||||
write!(f, " <{}>", self.src)?;
|
||||
|
||||
// Immediat operands list [Optional]
|
||||
if self.header.has_imm {
|
||||
write!(f, " <{}>", self.imm)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Use FromStr trait to decode from asm file
|
||||
impl std::str::FromStr for IOp {
|
||||
type Err = ParsingError;
|
||||
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
lazy_static! {
|
||||
static ref IOP_RE: regex::Regex = regex::Regex::new(
|
||||
r"^(?<opcode>\S+)\s*(?<props><.*?>)\s*(?<dst><.*?>)\s*(?<src><.*?>)\s*(?<imm><.*?>)?"
|
||||
)
|
||||
.expect("Invalid regex");
|
||||
}
|
||||
|
||||
if let Some(caps) = IOP_RE.captures(s) {
|
||||
let opcode = AsmIOpcode::from_str(caps["opcode"].trim_matches(['<', '>', ' ']))?;
|
||||
let props = Properties::from_str(caps["props"].trim_matches(['<', '>', ' ']))?;
|
||||
let dst = {
|
||||
let mut bundle =
|
||||
OperandBundle::from_str(caps["dst"].trim_matches(['<', '>', ' ']))?;
|
||||
bundle.set_kind(OperandKind::Dst);
|
||||
bundle
|
||||
};
|
||||
let src = {
|
||||
let mut bundle =
|
||||
OperandBundle::from_str(caps["src"].trim_matches(['<', '>', ' ']))?;
|
||||
bundle.set_kind(OperandKind::Src);
|
||||
bundle
|
||||
};
|
||||
let (imm, has_imm) = if let Some(imm) = caps.name("imm") {
|
||||
(
|
||||
ImmBundle::from_str(imm.as_str().trim_matches(['<', '>', ' ']))?,
|
||||
true,
|
||||
)
|
||||
} else {
|
||||
(ImmBundle::from(vec![]), false)
|
||||
};
|
||||
|
||||
// Aggregate some fields together to build real IOp
|
||||
let header = IOpHeader {
|
||||
fw_mode: props.fw_mode,
|
||||
has_imm,
|
||||
opcode: opcode.opcode,
|
||||
dst_align: props.dst_align,
|
||||
src_align: props.src_align,
|
||||
};
|
||||
|
||||
Ok(IOp {
|
||||
header,
|
||||
dst,
|
||||
src,
|
||||
imm,
|
||||
})
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(format!(
|
||||
"Invalid argument format {s}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
540
backends/tfhe-hpu-backend/src/asm/iop/field.rs
Normal file
540
backends/tfhe-hpu-backend/src/asm/iop/field.rs
Normal file
@@ -0,0 +1,540 @@
|
||||
//! List of IOp field
|
||||
//! Mainly thin wrapper over basic type to enforce correct used of asm fields
|
||||
use super::*;
|
||||
use crate::asm::CtId;
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
/// Parsing error
|
||||
#[derive(Error, Debug, Clone)]
|
||||
pub enum HexParsingError {
|
||||
#[error("Invalid header")]
|
||||
Header,
|
||||
#[error("Invalid Operand Kind: {0}")]
|
||||
Kind(String),
|
||||
#[error("Invalid operand blocks")]
|
||||
Block,
|
||||
#[error("Incomplete stream")]
|
||||
EmptyStream,
|
||||
}
|
||||
|
||||
// Vectorized ciphertext operands
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
/// Type of the operands
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum OperandKind {
|
||||
Src = 0x0,
|
||||
Dst = 0x1,
|
||||
Imm = 0x2,
|
||||
Unknown = 0x3,
|
||||
}
|
||||
|
||||
/// VectorSize
|
||||
/// => Number of operands defined in the operands block
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct VectorSize(pub u8);
|
||||
impl VectorSize {
|
||||
/// Create vector size with the correct encoding
|
||||
pub fn new(len: u8) -> Self {
|
||||
assert!(len != 0, "Empty vector couldn't be encoded");
|
||||
Self(len - 1)
|
||||
}
|
||||
}
|
||||
|
||||
/// OperandSize
|
||||
/// => Number of valid digit in oach operand block
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct OperandBlock(pub u8);
|
||||
impl OperandBlock {
|
||||
/// Create vector size with the correct encoding
|
||||
pub fn new(width: u8) -> Self {
|
||||
assert!(width != 0, "Empty block couldn't be encoded");
|
||||
Self(width - 1)
|
||||
}
|
||||
}
|
||||
|
||||
/// Ciphertext vectorized operands with extra parsing flags
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct Operand {
|
||||
pub base_cid: CtId,
|
||||
pub block: OperandBlock,
|
||||
pub vec_size: VectorSize,
|
||||
pub is_last: bool,
|
||||
pub kind: OperandKind,
|
||||
}
|
||||
|
||||
impl Operand {
|
||||
pub(crate) fn new(block: u8, base_cid: u16, vec_size: u8, kind: Option<OperandKind>) -> Self {
|
||||
Self {
|
||||
kind: kind.unwrap_or(OperandKind::Unknown),
|
||||
is_last: false,
|
||||
vec_size: VectorSize::new(vec_size),
|
||||
block: OperandBlock::new(block),
|
||||
base_cid: CtId(base_cid),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a dedicated type for a collection of Immediat
|
||||
/// This is to enable trait implementation on it (c.f arg)
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OperandBundle(Vec<Operand>);
|
||||
|
||||
impl OperandBundle {
|
||||
pub(crate) fn set_kind(&mut self, kind: OperandKind) {
|
||||
assert!(
|
||||
kind != OperandKind::Imm,
|
||||
"OperandBundle couldn't be tagged as Imm"
|
||||
);
|
||||
self.0.iter_mut().for_each(|op| op.kind = kind);
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Operand>> for OperandBundle {
|
||||
fn from(inner: Vec<Operand>) -> Self {
|
||||
let mut inner = inner;
|
||||
// Enforce correct is_last handling
|
||||
inner.iter_mut().for_each(|op| op.is_last = false);
|
||||
if let Some(last) = inner.last_mut() {
|
||||
last.is_last = true;
|
||||
}
|
||||
Self(inner)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for OperandBundle {
|
||||
type Target = Vec<Operand>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl OperandBundle {
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
pub fn from_words(stream: &[IOpWordRepr]) -> Result<(Self, usize), HexParsingError> {
|
||||
// Keep track of the current peak index
|
||||
let mut peak_words = 0;
|
||||
|
||||
let mut op_list = Vec::new();
|
||||
loop {
|
||||
let op = if let Some(op_word) = stream.get(peak_words) {
|
||||
peak_words += 1;
|
||||
Operand::from(&fmt::OperandHex::from_bits(*op_word))
|
||||
} else {
|
||||
return Err(HexParsingError::EmptyStream);
|
||||
};
|
||||
op_list.push(op);
|
||||
if op.is_last {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok((Self(op_list), peak_words))
|
||||
}
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
pub fn to_words(&self) -> Vec<IOpWordRepr> {
|
||||
self.0
|
||||
.iter()
|
||||
.map(|op| fmt::OperandHex::from(op).into_bits())
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
// Immediate operands
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
/// Immediat Size
|
||||
/// => Number of valid digit in following immediat
|
||||
/// To obtain the number of valid bits, user should multiply by the msg_width
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct ImmBlock(pub u16);
|
||||
|
||||
/// Immediat header
|
||||
/// Use to implement top-level parser manually
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct ImmediatHeader {
|
||||
pub(super) lsb_msg: u16,
|
||||
pub(super) block: ImmBlock,
|
||||
pub(super) is_last: bool,
|
||||
pub(super) kind: OperandKind,
|
||||
}
|
||||
|
||||
/// Full Immediat representation (i.e. header + data)
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Immediat {
|
||||
pub(super) kind: OperandKind,
|
||||
pub(super) is_last: bool,
|
||||
pub(super) block: ImmBlock,
|
||||
pub(super) msg: Vec<u16>,
|
||||
}
|
||||
|
||||
impl Immediat {
|
||||
/// Access imm msg for template patching
|
||||
/// Extract the correct block (i.e. MSG_WIDTH chunk)
|
||||
pub fn msg_block(&self, bid: u8) -> u16 {
|
||||
let word_id = bid as u32 / (u16::BITS / MSG_WIDTH as u32);
|
||||
let block_id = bid as u32 % (u16::BITS / MSG_WIDTH as u32);
|
||||
if let Some(word) = self.msg.get(word_id as usize) {
|
||||
(word >> (block_id * MSG_WIDTH as u32)) & ((1 << MSG_WIDTH) - 1)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_cst(cst: u128) -> Self {
|
||||
let mut u16_cst = cst
|
||||
.to_le_bytes()
|
||||
.chunks(2)
|
||||
.map(|x| u16::from_le_bytes(x.try_into().unwrap()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut cst = cst;
|
||||
let block = {
|
||||
let mut block = 0;
|
||||
while cst != 0 {
|
||||
block += 1;
|
||||
cst >>= 2;
|
||||
}
|
||||
ImmBlock(block)
|
||||
};
|
||||
|
||||
// Shrink to fit
|
||||
let msg_word = usize::div_ceil(block.0 as usize * MSG_WIDTH as usize, u16::BITS as usize);
|
||||
u16_cst.resize(msg_word, 0);
|
||||
|
||||
Self {
|
||||
kind: OperandKind::Imm,
|
||||
is_last: false,
|
||||
block,
|
||||
msg: u16_cst,
|
||||
}
|
||||
}
|
||||
pub fn cst_value(&self) -> u128 {
|
||||
self.msg
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(pos, val)| (*val as u128) << (8 * std::mem::size_of::<u16>() * pos))
|
||||
.sum::<u128>()
|
||||
}
|
||||
}
|
||||
|
||||
impl Immediat {
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
pub fn from_words(stream: &[IOpWordRepr]) -> Result<(Self, usize), HexParsingError> {
|
||||
// Keep track of the current peak index
|
||||
let mut peak_words = 0;
|
||||
|
||||
// 1. Parse header
|
||||
let header = if let Some(header_word) = stream.get(peak_words) {
|
||||
peak_words += 1;
|
||||
ImmediatHeader::from(&fmt::ImmediatHeaderHex::from_bits(*header_word))
|
||||
} else {
|
||||
return Err(HexParsingError::EmptyStream);
|
||||
};
|
||||
|
||||
// Check flags
|
||||
if header.kind != OperandKind::Imm {
|
||||
return Err(HexParsingError::Kind(format!(
|
||||
"Get {:?} instead of {:?}",
|
||||
header.kind,
|
||||
OperandKind::Imm
|
||||
)));
|
||||
}
|
||||
|
||||
// Get associated value:
|
||||
let mut le_msg = vec![header.lsb_msg];
|
||||
|
||||
let data_word = usize::div_ceil(
|
||||
header.block.0 as usize * MSG_WIDTH as usize,
|
||||
8 * (std::mem::size_of::<IOpWordRepr>() / std::mem::size_of::<u16>()),
|
||||
);
|
||||
|
||||
// NB: First imm word is encoded in the header
|
||||
for _w in 0..(data_word / 2) {
|
||||
if let Some(word) = stream.get(peak_words) {
|
||||
peak_words += 1;
|
||||
let u16_words = word
|
||||
.to_le_bytes()
|
||||
.chunks(2)
|
||||
.map(|x| u16::from_le_bytes(x.try_into().unwrap()))
|
||||
.collect::<Vec<_>>();
|
||||
le_msg.extend_from_slice(u16_words.as_slice());
|
||||
} else {
|
||||
return Err(HexParsingError::EmptyStream);
|
||||
}
|
||||
}
|
||||
|
||||
Ok((
|
||||
Self {
|
||||
kind: header.kind,
|
||||
is_last: header.is_last,
|
||||
block: header.block,
|
||||
msg: le_msg,
|
||||
},
|
||||
peak_words,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn to_words(&self) -> Vec<IOpWordRepr> {
|
||||
let mut words = Vec::new();
|
||||
let header = ImmediatHeader {
|
||||
lsb_msg: *self.msg.first().unwrap_or(&0),
|
||||
block: self.block,
|
||||
is_last: self.is_last,
|
||||
kind: self.kind,
|
||||
};
|
||||
words.push(fmt::ImmediatHeaderHex::from(&header).into_bits());
|
||||
|
||||
if self.msg.len() > 1 {
|
||||
for imm in self.msg[1..]
|
||||
.chunks(std::mem::size_of::<IOpWordRepr>() / std::mem::size_of_val(&self.msg[0]))
|
||||
{
|
||||
let imm_word = match imm.len() {
|
||||
1 => IOpWordRepr::from(imm[0]),
|
||||
2 => IOpWordRepr::from(
|
||||
imm[0] as IOpWordRepr + ((imm[1] as IOpWordRepr) << u16::BITS),
|
||||
),
|
||||
_ => panic!("Unsupported chunks, IOpWordRepr has been changed"),
|
||||
};
|
||||
words.push(imm_word);
|
||||
}
|
||||
}
|
||||
words
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a dedicated type for a collection of Immediat
|
||||
/// This is to enable trait implementation on it (c.f arg)
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ImmBundle(Vec<Immediat>);
|
||||
|
||||
impl ImmBundle {
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
pub fn from_words(stream: &[IOpWordRepr]) -> Result<(Self, usize), HexParsingError> {
|
||||
// Keep track of the current peak index
|
||||
let mut peak_words = 0;
|
||||
|
||||
let mut imm_list = Vec::new();
|
||||
loop {
|
||||
let (imm, peaked) = Immediat::from_words(&stream[peak_words..])?;
|
||||
peak_words += peaked;
|
||||
|
||||
let is_last = imm.is_last;
|
||||
imm_list.push(imm);
|
||||
if is_last {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok((Self(imm_list), peak_words))
|
||||
}
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
pub fn to_words(&self) -> Vec<IOpWordRepr> {
|
||||
self.0
|
||||
.iter()
|
||||
.flat_map(|imm| imm.to_words())
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Immediat>> for ImmBundle {
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
fn from(inner: Vec<Immediat>) -> Self {
|
||||
let mut inner = inner;
|
||||
// Enforce correct is_last handling
|
||||
inner.iter_mut().for_each(|op| op.is_last = false);
|
||||
if let Some(last) = inner.last_mut() {
|
||||
last.is_last = true;
|
||||
}
|
||||
Self(inner)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for ImmBundle {
|
||||
type Target = Vec<Immediat>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
// IOp header
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
/// Opcode
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
|
||||
pub struct IOpcode(pub u8);
|
||||
|
||||
/// Type of the operands
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum FwMode {
|
||||
Static = 0x0,
|
||||
Dynamic = 0x1,
|
||||
}
|
||||
|
||||
/// IOpHeader
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct IOpHeader {
|
||||
pub(super) src_align: OperandBlock,
|
||||
pub(super) dst_align: OperandBlock,
|
||||
pub(super) opcode: IOpcode,
|
||||
pub(super) has_imm: bool,
|
||||
pub(super) fw_mode: FwMode,
|
||||
}
|
||||
|
||||
/// Gather all subparts together
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IOp {
|
||||
pub(super) header: IOpHeader,
|
||||
pub(super) dst: OperandBundle,
|
||||
pub(super) src: OperandBundle,
|
||||
pub(super) imm: ImmBundle,
|
||||
}
|
||||
use std::collections::VecDeque;
|
||||
|
||||
/// Implement construction
|
||||
/// Used to construct IOp from Backend HpuVar
|
||||
impl IOp {
|
||||
pub fn new(opcode: IOpcode, dst: Vec<Operand>, src: Vec<Operand>, imm: Vec<Immediat>) -> Self {
|
||||
let dst_align = dst.iter().map(|x| x.block).max().unwrap();
|
||||
let src_align = src.iter().map(|x| x.block).max().unwrap();
|
||||
let has_imm = !imm.is_empty();
|
||||
|
||||
let header = IOpHeader {
|
||||
src_align,
|
||||
dst_align,
|
||||
opcode,
|
||||
has_imm,
|
||||
fw_mode: FwMode::Static,
|
||||
};
|
||||
Self {
|
||||
header,
|
||||
dst: dst.into(),
|
||||
src: src.into(),
|
||||
imm: imm.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn opcode(&self) -> IOpcode {
|
||||
self.header.opcode
|
||||
}
|
||||
pub fn asm_opcode(&self) -> AsmIOpcode {
|
||||
self.header.opcode.into()
|
||||
}
|
||||
|
||||
// Compute associated fw block size
|
||||
// Used to compute fw_entry offset and fw translation validity
|
||||
pub fn fw_blk_width(&self) -> usize {
|
||||
std::cmp::max(self.header.dst_align.0, self.header.src_align.0) as usize
|
||||
}
|
||||
|
||||
// Compute fw table entry
|
||||
pub fn fw_entry(&self) -> usize {
|
||||
self.fw_blk_width() * 0x100 + self.header.opcode.0 as usize
|
||||
}
|
||||
pub fn dst(&self) -> &OperandBundle {
|
||||
&self.dst
|
||||
}
|
||||
pub fn src(&self) -> &OperandBundle {
|
||||
&self.src
|
||||
}
|
||||
pub fn imm(&self) -> &ImmBundle {
|
||||
&self.imm
|
||||
}
|
||||
}
|
||||
/// Implement parsing logic from stream of word
|
||||
/// Only consume the VecDeque on Success
|
||||
impl IOp {
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
pub fn from_words(stream: &mut VecDeque<IOpWordRepr>) -> Result<Self, HexParsingError> {
|
||||
// Keep track of the current peak index
|
||||
let mut peak_words = 0;
|
||||
|
||||
// Enforce contiguous for ease of addressing in the queue
|
||||
stream.make_contiguous();
|
||||
|
||||
// 1. Parse header
|
||||
let header = if let Some(header_word) = stream.get(peak_words) {
|
||||
peak_words += 1;
|
||||
IOpHeader::from(&fmt::IOpHeaderHex::from(*header_word))
|
||||
} else {
|
||||
return Err(HexParsingError::EmptyStream);
|
||||
};
|
||||
|
||||
// 2. Parse Destination operands
|
||||
let dst = {
|
||||
let (dst, peaked) = OperandBundle::from_words(&stream.as_slices().0[peak_words..])?;
|
||||
for op in dst.iter() {
|
||||
// Check flags
|
||||
if op.kind != OperandKind::Dst {
|
||||
return Err(HexParsingError::Kind(format!(
|
||||
"Get {:?} instead of {:?}",
|
||||
op.kind,
|
||||
OperandKind::Dst
|
||||
)));
|
||||
}
|
||||
if op.block > header.dst_align {
|
||||
return Err(HexParsingError::Kind(format!(
|
||||
"Get {:?} > {:?}",
|
||||
op.block, header.dst_align
|
||||
)));
|
||||
}
|
||||
}
|
||||
peak_words += peaked;
|
||||
dst
|
||||
};
|
||||
|
||||
// 3. Parse Source operands
|
||||
let src = {
|
||||
let (src, peaked) = OperandBundle::from_words(&stream.as_slices().0[peak_words..])?;
|
||||
for op in src.iter() {
|
||||
// Check flags
|
||||
if op.kind != OperandKind::Src {
|
||||
return Err(HexParsingError::Kind(format!(
|
||||
"Get {:?} instead of {:?}",
|
||||
op.kind,
|
||||
OperandKind::Src
|
||||
)));
|
||||
}
|
||||
if op.block > header.src_align {
|
||||
return Err(HexParsingError::Kind(format!(
|
||||
"Get {:?} > {:?}",
|
||||
op.block, header.src_align
|
||||
)));
|
||||
}
|
||||
}
|
||||
peak_words += peaked;
|
||||
src
|
||||
};
|
||||
|
||||
// 4. Parse Immediat [Optional]
|
||||
let (imm, peaked) = if header.has_imm {
|
||||
ImmBundle::from_words(&stream.as_slices().0[peak_words..])?
|
||||
} else {
|
||||
(ImmBundle(Vec::new()), 0)
|
||||
};
|
||||
peak_words += peaked;
|
||||
|
||||
// Successful extraction from the dequeue
|
||||
// Consume the associated words
|
||||
stream.drain(0..peak_words);
|
||||
|
||||
Ok(Self {
|
||||
header,
|
||||
dst,
|
||||
src,
|
||||
imm,
|
||||
})
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", ret)]
|
||||
pub fn to_words(&self) -> Vec<IOpWordRepr> {
|
||||
let mut words = Vec::new();
|
||||
// 1. Header
|
||||
words.push(fmt::IOpHeaderHex::from(&self.header).into_bits());
|
||||
// 2. Destination
|
||||
words.extend(self.dst.to_words());
|
||||
// 3. Sources
|
||||
words.extend(self.src.to_words());
|
||||
// 4. Immediat
|
||||
words.extend(self.imm.to_words());
|
||||
words
|
||||
}
|
||||
}
|
||||
154
backends/tfhe-hpu-backend/src/asm/iop/fmt.rs
Normal file
154
backends/tfhe-hpu-backend/src/asm/iop/fmt.rs
Normal file
@@ -0,0 +1,154 @@
|
||||
//!
|
||||
//! Define binary format encoding of IOp instructions
|
||||
//! Rely on `bitfield_struct` crate to define bit-accurate insn format
|
||||
//! and some manual From/To implementation to move to internal type
|
||||
use crate::asm::CtId;
|
||||
use bitfield_struct::bitfield;
|
||||
|
||||
use super::*;
|
||||
|
||||
// Define type alias for underlying native type.
|
||||
// NB: Currently bitfield don't support type alias and thus we use native type instead
|
||||
pub type IOpWordRepr = u32;
|
||||
pub type IOpRepr = Vec<u32>;
|
||||
|
||||
#[bitfield(u32)]
|
||||
pub struct OperandHex {
|
||||
#[bits(16)]
|
||||
base_cid: u16,
|
||||
#[bits(8)]
|
||||
block: u8,
|
||||
#[bits(5)]
|
||||
vec_size: u8,
|
||||
#[bits(1)]
|
||||
is_last: bool,
|
||||
#[bits(2)]
|
||||
kind: u8,
|
||||
}
|
||||
|
||||
impl From<&OperandHex> for field::Operand {
|
||||
fn from(value: &OperandHex) -> Self {
|
||||
let kind = if value.kind() == OperandKind::Src as u8 {
|
||||
OperandKind::Src
|
||||
} else if value.kind() == OperandKind::Dst as u8 {
|
||||
OperandKind::Dst
|
||||
} else if value.kind() == OperandKind::Imm as u8 {
|
||||
OperandKind::Imm
|
||||
} else {
|
||||
OperandKind::Unknown
|
||||
};
|
||||
|
||||
Self {
|
||||
base_cid: CtId(value.base_cid()),
|
||||
block: field::OperandBlock(value.block()),
|
||||
vec_size: field::VectorSize(value.vec_size()),
|
||||
is_last: value.is_last(),
|
||||
kind,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Operand> for OperandHex {
|
||||
fn from(value: &Operand) -> Self {
|
||||
Self::new()
|
||||
.with_base_cid(value.base_cid.0)
|
||||
.with_block(value.block.0)
|
||||
.with_vec_size(value.vec_size.0)
|
||||
.with_is_last(value.is_last)
|
||||
.with_kind(value.kind as u8)
|
||||
}
|
||||
}
|
||||
|
||||
#[bitfield(u32)]
|
||||
pub struct ImmediatHeaderHex {
|
||||
#[bits(16)]
|
||||
lsb_msg: u16,
|
||||
#[bits(12)]
|
||||
block: u16,
|
||||
#[bits(1)]
|
||||
is_last: bool,
|
||||
#[bits(1)]
|
||||
_reserved: u8,
|
||||
#[bits(2)]
|
||||
kind: u8,
|
||||
}
|
||||
|
||||
impl From<&ImmediatHeaderHex> for field::ImmediatHeader {
|
||||
fn from(value: &ImmediatHeaderHex) -> Self {
|
||||
let kind = if value.kind() == OperandKind::Src as u8 {
|
||||
OperandKind::Src
|
||||
} else if value.kind() == OperandKind::Dst as u8 {
|
||||
OperandKind::Dst
|
||||
} else if value.kind() == OperandKind::Imm as u8 {
|
||||
OperandKind::Imm
|
||||
} else {
|
||||
OperandKind::Unknown
|
||||
};
|
||||
|
||||
Self {
|
||||
lsb_msg: value.lsb_msg(),
|
||||
block: field::ImmBlock(value.block()),
|
||||
is_last: value.is_last(),
|
||||
kind,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&field::ImmediatHeader> for ImmediatHeaderHex {
|
||||
fn from(value: &field::ImmediatHeader) -> Self {
|
||||
Self::new()
|
||||
.with_lsb_msg(value.lsb_msg)
|
||||
.with_block(value.block.0)
|
||||
.with_is_last(value.is_last)
|
||||
.with_kind(value.kind as u8)
|
||||
}
|
||||
}
|
||||
|
||||
#[bitfield(u32)]
|
||||
pub struct IOpHeaderHex {
|
||||
#[bits(8)]
|
||||
src_align: u8,
|
||||
#[bits(8)]
|
||||
dst_align: u8,
|
||||
#[bits(8)]
|
||||
opcode: u8,
|
||||
#[bits(1)]
|
||||
has_imm: bool,
|
||||
#[bits(1)]
|
||||
fw_mode: bool,
|
||||
#[bits(6)]
|
||||
_reserved: u8,
|
||||
}
|
||||
|
||||
impl From<&IOpHeaderHex> for field::IOpHeader {
|
||||
fn from(value: &IOpHeaderHex) -> Self {
|
||||
let fw_mode = match value.fw_mode() {
|
||||
true => field::FwMode::Dynamic,
|
||||
false => field::FwMode::Static,
|
||||
};
|
||||
|
||||
Self {
|
||||
src_align: field::OperandBlock(value.src_align()),
|
||||
dst_align: field::OperandBlock(value.dst_align()),
|
||||
opcode: field::IOpcode(value.opcode()),
|
||||
has_imm: value.has_imm(),
|
||||
fw_mode,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&field::IOpHeader> for IOpHeaderHex {
|
||||
fn from(value: &field::IOpHeader) -> Self {
|
||||
let fw_mode = match value.fw_mode {
|
||||
field::FwMode::Dynamic => true,
|
||||
field::FwMode::Static => false,
|
||||
};
|
||||
|
||||
Self::new()
|
||||
.with_src_align(value.src_align.0)
|
||||
.with_dst_align(value.dst_align.0)
|
||||
.with_opcode(value.opcode.0)
|
||||
.with_has_imm(value.has_imm)
|
||||
.with_fw_mode(fw_mode)
|
||||
}
|
||||
}
|
||||
61
backends/tfhe-hpu-backend/src/asm/iop/iop_macro.rs
Normal file
61
backends/tfhe-hpu-backend/src/asm/iop/iop_macro.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
//! IOp mapping
|
||||
//!
|
||||
//! IOp currently share one format.
|
||||
//! Some of them (upper 128) are handled by the fw and are named, the other one is for custom user
|
||||
//! entries.
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! iop {
|
||||
(
|
||||
$([ $proto: ident -> $asm: literal, $opcode: expr] $(,)?)*
|
||||
) => {
|
||||
::paste::paste! {
|
||||
/// Parser utilities
|
||||
/// Hashmap for Name -> to (Opcode, (src, imm, dst))
|
||||
pub(crate) struct IOpFromArg {
|
||||
pub(crate) asm: HashMap<String, IOpFormat>,
|
||||
pub(crate) hex: HashMap<IOpcode, IOpFormat>,
|
||||
}
|
||||
lazy_static! {
|
||||
pub(crate) static ref IOP_LUT: IOpFromArg = {
|
||||
|
||||
let mut iop_from_arg = IOpFromArg{
|
||||
asm: HashMap::new(),
|
||||
hex: HashMap::new(),
|
||||
};
|
||||
|
||||
$(
|
||||
let iop_format = IOpFormat{
|
||||
name: stringify!([< $asm:upper >]).to_string(),
|
||||
opcode: IOpcode($opcode),
|
||||
proto: $proto.clone().into()
|
||||
};
|
||||
iop_from_arg.asm.insert(stringify!([< $asm:upper >]).to_string(), iop_format.clone());
|
||||
iop_from_arg.hex.insert(IOpcode($opcode), iop_format);
|
||||
)*
|
||||
iop_from_arg
|
||||
};
|
||||
}
|
||||
// Export each AsmIOpCode as constant
|
||||
$(
|
||||
lazy_static! {
|
||||
pub static ref [< IOP_ $asm:upper >]: AsmIOpcode = {
|
||||
AsmIOpcode{opcode: IOpcode($opcode), format: Some(IOpFormat{
|
||||
name: stringify!([< $asm:upper >]).to_string(),
|
||||
opcode: IOpcode($opcode),
|
||||
proto: $proto.clone().into()
|
||||
})}
|
||||
};
|
||||
}
|
||||
)*
|
||||
|
||||
lazy_static! {
|
||||
pub static ref IOP_LIST: Vec<AsmIOpcode> = vec![ $(AsmIOpcode{opcode: IOpcode($opcode), format: Some(IOpFormat{
|
||||
name: stringify!([< $asm:upper >]).to_string(),
|
||||
opcode: IOpcode($opcode),
|
||||
proto: $proto.clone().into()
|
||||
})},)*];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
184
backends/tfhe-hpu-backend/src/asm/iop/mod.rs
Normal file
184
backends/tfhe-hpu-backend/src/asm/iop/mod.rs
Normal file
@@ -0,0 +1,184 @@
|
||||
//!
|
||||
//! IOp definition
|
||||
|
||||
mod field;
|
||||
pub use field::{HexParsingError, IOp, IOpcode, Immediat, Operand, OperandKind};
|
||||
mod fmt;
|
||||
pub use fmt::{IOpRepr, IOpWordRepr};
|
||||
mod iop_macro;
|
||||
pub mod opcode;
|
||||
|
||||
mod arg;
|
||||
pub use arg::{AsmIOpcode, ParsingError};
|
||||
|
||||
// TODO find a proper way to let this runtime properties
|
||||
pub const MSG_WIDTH: u8 = 2;
|
||||
pub const CARRY_WIDTH: u8 = 2;
|
||||
|
||||
/// Enum used to define a variable size relative to current integer width
|
||||
#[derive(Debug, Eq, PartialEq, Clone, Copy, serde::Serialize, serde::Deserialize)]
|
||||
pub enum VarMode {
|
||||
Native,
|
||||
Half,
|
||||
Bool,
|
||||
}
|
||||
|
||||
/// Implement FromString trait to enable parsing from CLI
|
||||
impl std::str::FromStr for VarMode {
|
||||
type Err = ParsingError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"n" | "nat" | "native" => Ok(VarMode::Native),
|
||||
"h" | "half" => Ok(VarMode::Half),
|
||||
"b" | "bool" => Ok(VarMode::Bool),
|
||||
_ => Err(ParsingError::InvalidArg(format!("Invalid VarMode: {s}"))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Struct used to depict IOp prototype with clarity
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ConstIOpProto<const D: usize, const S: usize> {
|
||||
pub dst: [VarMode; D],
|
||||
pub src: [VarMode; S],
|
||||
pub imm: usize,
|
||||
}
|
||||
|
||||
/// Dynamic type to erase const template
|
||||
// TODO moved from runtime check to compile time one
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub struct IOpProto {
|
||||
pub dst: Vec<VarMode>,
|
||||
pub src: Vec<VarMode>,
|
||||
pub imm: usize,
|
||||
}
|
||||
|
||||
impl<const D: usize, const S: usize> From<ConstIOpProto<D, S>> for IOpProto {
|
||||
fn from(const_val: ConstIOpProto<D, S>) -> Self {
|
||||
Self {
|
||||
dst: const_val.dst.into(),
|
||||
src: const_val.src.into(),
|
||||
imm: const_val.imm,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement FromString trait to enable parsing from CLI
|
||||
impl std::str::FromStr for IOpProto {
|
||||
type Err = ParsingError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
lazy_static! {
|
||||
static ref PROTO_ARG_RE: regex::Regex =
|
||||
regex::Regex::new(r"<(?<dst>[\w\s,]+)>::<(?<src>[\w\s,]*)><(?<imm>\d+)>")
|
||||
.expect("Invalid regex");
|
||||
}
|
||||
if let Some(caps) = PROTO_ARG_RE.captures(s) {
|
||||
let dst = if let Some(dst_raw) = caps.name("dst") {
|
||||
dst_raw
|
||||
.as_str()
|
||||
.split(',')
|
||||
.map(|x| x.trim().parse())
|
||||
.collect::<Result<Vec<VarMode>, ParsingError>>()
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(
|
||||
"Invalid IOpProto: Missing dst field (e.g. <Native, Bool>".to_string(),
|
||||
))
|
||||
}?;
|
||||
|
||||
let src = if let Some(src_raw) = caps.name("src") {
|
||||
src_raw
|
||||
.as_str()
|
||||
.split(',')
|
||||
.map(|x| x.trim().parse())
|
||||
.collect::<Result<Vec<VarMode>, ParsingError>>()
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(
|
||||
"Invalid IOpProto: Missing src field (e.g. <Native, Half, Bool, ...>"
|
||||
.to_string(),
|
||||
))
|
||||
}?;
|
||||
let imm = if let Some(imm_raw) = caps.name("imm") {
|
||||
imm_raw
|
||||
.as_str()
|
||||
.parse::<usize>()
|
||||
.map_err(|err| ParsingError::InvalidArg(err.to_string()))
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(
|
||||
"Invalid IOpProto: Missing imm field (e.g. <2>".to_string(),
|
||||
))
|
||||
}?;
|
||||
|
||||
Ok(IOpProto { dst, src, imm })
|
||||
} else {
|
||||
Err(ParsingError::Unmatch(format!(
|
||||
"Invalid IOpProto format {s}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Define some common iop format
|
||||
pub const IOP_CT_F_CT: ConstIOpProto<1, 1> = ConstIOpProto {
|
||||
dst: [VarMode::Native; 1],
|
||||
src: [VarMode::Native; 1],
|
||||
imm: 0,
|
||||
};
|
||||
pub const IOP_CT_F_2CT: ConstIOpProto<1, 2> = ConstIOpProto {
|
||||
dst: [VarMode::Native; 1],
|
||||
src: [VarMode::Native; 2],
|
||||
imm: 0,
|
||||
};
|
||||
pub const IOP_CT_F_2CT_BOOL: ConstIOpProto<1, 3> = ConstIOpProto {
|
||||
dst: [VarMode::Native; 1],
|
||||
src: [VarMode::Native, VarMode::Native, VarMode::Bool],
|
||||
imm: 0,
|
||||
};
|
||||
pub const IOP_CT_F_CT_BOOL: ConstIOpProto<1, 2> = ConstIOpProto {
|
||||
dst: [VarMode::Native; 1],
|
||||
src: [VarMode::Native, VarMode::Bool],
|
||||
imm: 0,
|
||||
};
|
||||
pub const IOP_CT_F_CT_SCALAR: ConstIOpProto<1, 1> = ConstIOpProto {
|
||||
dst: [VarMode::Native; 1],
|
||||
src: [VarMode::Native; 1],
|
||||
imm: 1,
|
||||
};
|
||||
pub const IOP_CMP: ConstIOpProto<1, 2> = ConstIOpProto {
|
||||
dst: [VarMode::Bool; 1],
|
||||
src: [VarMode::Native; 2],
|
||||
imm: 0,
|
||||
};
|
||||
pub const IOP_2CT_F_3CT: ConstIOpProto<2, 3> = ConstIOpProto {
|
||||
dst: [VarMode::Native; 2],
|
||||
src: [VarMode::Native; 3],
|
||||
imm: 0,
|
||||
};
|
||||
|
||||
use crate::iop;
|
||||
use arg::IOpFormat;
|
||||
use lazy_static::lazy_static;
|
||||
use std::collections::HashMap;
|
||||
iop!(
|
||||
[IOP_CT_F_CT_SCALAR -> "ADDS", opcode::ADDS],
|
||||
[IOP_CT_F_CT_SCALAR -> "SUBS", opcode::SUBS],
|
||||
[IOP_CT_F_CT_SCALAR -> "SSUB", opcode::SSUB],
|
||||
[IOP_CT_F_CT_SCALAR -> "MULS", opcode::MULS],
|
||||
[IOP_CT_F_2CT -> "ADD", opcode::ADD],
|
||||
[IOP_CT_F_2CT -> "SUB", opcode::SUB],
|
||||
[IOP_CT_F_2CT -> "MUL", opcode::MUL],
|
||||
[IOP_CT_F_2CT -> "BW_AND", opcode::BW_AND],
|
||||
[IOP_CT_F_2CT -> "BW_OR", opcode::BW_OR],
|
||||
[IOP_CT_F_2CT -> "BW_XOR", opcode::BW_XOR],
|
||||
[IOP_CMP -> "CMP_GT", opcode::CMP_GT],
|
||||
[IOP_CMP -> "CMP_GTE", opcode::CMP_GTE],
|
||||
[IOP_CMP -> "CMP_LT", opcode::CMP_LT],
|
||||
[IOP_CMP -> "CMP_LTE", opcode::CMP_LTE],
|
||||
[IOP_CMP -> "CMP_EQ", opcode::CMP_EQ],
|
||||
[IOP_CMP -> "CMP_NEQ", opcode::CMP_NEQ],
|
||||
[IOP_CT_F_CT_BOOL -> "IF_THEN_ZERO", opcode::IF_THEN_ZERO],
|
||||
[IOP_CT_F_2CT_BOOL -> "IF_THEN_ELSE", opcode::IF_THEN_ELSE],
|
||||
[IOP_2CT_F_3CT -> "ERC_20", opcode::ERC_20],
|
||||
[IOP_CT_F_CT -> "MEMCPY", opcode::MEMCPY],
|
||||
);
|
||||
61
backends/tfhe-hpu-backend/src/asm/iop/opcode.rs
Normal file
61
backends/tfhe-hpu-backend/src/asm/iop/opcode.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
//!
|
||||
//! Define hex encoding for a subset of known IOp
|
||||
//! NB: Start from highest IOpcode to reduce the likelihood to clash with user custom operation on
|
||||
//! extensions
|
||||
//!
|
||||
//! Current Opcode space could be viewed as follow:
|
||||
//! | Range | Categories |
|
||||
//! | ---------- | ------------------------- |
|
||||
//! | 0x00.. 0x7f| User custom operations |
|
||||
//! | 0x80.. 0xff| Fw generated operations |
|
||||
//! | 0b1xyz_0000| x: Ct x Ct Operation |
|
||||
//! | | !x: Ct x Imm Operation |
|
||||
//! | | y!z: ARITH operations |
|
||||
//! | | !yz: BW operations |
|
||||
//! | | !y!z: CMP operations |
|
||||
//! | ---------- | ------------------------- |
|
||||
|
||||
pub const USER_RANGE_LB: u8 = 0x0;
|
||||
pub const USER_RANGE_UB: u8 = 0x7f;
|
||||
|
||||
// Ct x Imm -------------------------------------------------------------------
|
||||
pub const ADDS: u8 = 0xA0;
|
||||
pub const SUBS: u8 = 0xA1;
|
||||
pub const SSUB: u8 = 0xA2;
|
||||
pub const MULS: u8 = 0xA3;
|
||||
|
||||
// Ct x Ct -------------------------------------------------------------------
|
||||
// Arith operations
|
||||
pub const ADD: u8 = 0xE0;
|
||||
pub const SUB: u8 = 0xE2;
|
||||
pub const MUL: u8 = 0xE4;
|
||||
|
||||
// BW operations
|
||||
pub const BW_AND: u8 = 0xD0;
|
||||
pub const BW_OR: u8 = 0xD1;
|
||||
pub const BW_XOR: u8 = 0xD2;
|
||||
|
||||
// Cmp operations
|
||||
pub const CMP_GT: u8 = 0xC0;
|
||||
pub const CMP_GTE: u8 = 0xC1;
|
||||
pub const CMP_LT: u8 = 0xC2;
|
||||
pub const CMP_LTE: u8 = 0xC3;
|
||||
pub const CMP_EQ: u8 = 0xC4;
|
||||
pub const CMP_NEQ: u8 = 0xC5;
|
||||
|
||||
// Ternary operations
|
||||
// IfThenZero -> Select or force to 0
|
||||
// Take 1Ct and a Boolean Ct as input
|
||||
pub const IF_THEN_ZERO: u8 = 0xCA;
|
||||
// IfThenElse -> Select operation
|
||||
// Take 2Ct and a Boolean Ct as input
|
||||
pub const IF_THEN_ELSE: u8 = 0xCB;
|
||||
|
||||
// Custom algorithm
|
||||
// ERC20 -> Found xfer algorithm
|
||||
// 2Ct <- func(3Ct)
|
||||
pub const ERC_20: u8 = 0x80;
|
||||
|
||||
// Utility operations
|
||||
// Used to handle real clone of ciphertext already uploaded in the Hpu memory
|
||||
pub const MEMCPY: u8 = 0xFF;
|
||||
334
backends/tfhe-hpu-backend/src/asm/mod.rs
Normal file
334
backends/tfhe-hpu-backend/src/asm/mod.rs
Normal file
@@ -0,0 +1,334 @@
|
||||
pub mod dop;
|
||||
pub use dop::arg::Arg as DOpArg;
|
||||
pub use dop::{DOp, DigitParameters, ImmId, MemId, Pbs, PbsGid, PbsLut, RegId, ToHex};
|
||||
pub mod iop;
|
||||
pub use iop::{AsmIOpcode, IOp, IOpProto, IOpcode, OperandKind};
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::io::{BufRead, Write};
|
||||
|
||||
pub const ASM_COMMENT_PREFIX: [char; 2] = [';', '#'];
|
||||
|
||||
// Common type used in both DOp/IOp definition --------------------------------
|
||||
/// Ciphertext Id
|
||||
/// On-board memory is viewed as an array of ciphertext,
|
||||
/// Thus, instead of using bytes address, ct id is used
|
||||
/// => Id of the first ciphertext of the vector
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
|
||||
pub struct CtId(pub u16);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Simple test for Asm parsing
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
/// Type to aggregate Op and header
|
||||
/// Aim is to kept correct interleaving while parsing
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum AsmOp<Op> {
|
||||
Comment(String),
|
||||
Stmt(Op),
|
||||
}
|
||||
|
||||
impl<Op: dop::arg::ToFlush> AsmOp<Op> {
|
||||
pub fn to_flush(&mut self) {
|
||||
if let AsmOp::Stmt(op) = self {
|
||||
*op = op.to_flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Op: std::fmt::Display> std::fmt::Display for AsmOp<Op> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Comment(c) => write!(f, "{}{c}", ASM_COMMENT_PREFIX[0]),
|
||||
Self::Stmt(op) => write!(f, "{op}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generic struct to represent sequence of operations
|
||||
/// Used to extract OP from ASM file
|
||||
/// Work on any kind of Op that implement FromStr
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Program<Op>(Vec<AsmOp<Op>>);
|
||||
|
||||
impl<Op> Default for Program<Op> {
|
||||
fn default() -> Self {
|
||||
Self(Vec::new())
|
||||
}
|
||||
}
|
||||
|
||||
impl<Op> Program<Op> {
|
||||
pub fn new(ops: Vec<AsmOp<Op>>) -> Self {
|
||||
Self(ops)
|
||||
}
|
||||
/// Push a new statement in the program
|
||||
pub fn push_stmt(&mut self, op: Op) {
|
||||
self.0.push(AsmOp::Stmt(op))
|
||||
}
|
||||
/// Push a new statement in the program
|
||||
/// Returns the position in which the statement was inserted
|
||||
pub fn push_stmt_pos(&mut self, op: Op) -> usize {
|
||||
let ret = self.0.len();
|
||||
self.0.push(AsmOp::Stmt(op));
|
||||
ret
|
||||
}
|
||||
/// Push a new comment in the program
|
||||
pub fn push_comment(&mut self, comment: String) {
|
||||
self.0.push(AsmOp::Comment(comment))
|
||||
}
|
||||
|
||||
pub fn get_stmt_mut(&mut self, i: usize) -> &mut AsmOp<Op> {
|
||||
&mut self.0[i]
|
||||
}
|
||||
}
|
||||
|
||||
impl<Op> std::ops::Deref for Program<Op> {
|
||||
type Target = Vec<AsmOp<Op>>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<Op: std::fmt::Display> std::fmt::Display for Program<Op> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
for op in self.0.iter() {
|
||||
writeln!(f, "{op}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<Op, Err> Program<Op>
|
||||
where
|
||||
Op: std::str::FromStr<Err = Err>,
|
||||
Err: std::error::Error,
|
||||
{
|
||||
/// Generic function to extract OP from ASM file
|
||||
/// Work on any kind of Op that implement FromStr
|
||||
pub fn read_asm(file: &str) -> Result<Self, anyhow::Error> {
|
||||
// Open file
|
||||
let rd_f = std::io::BufReader::new(
|
||||
std::fs::OpenOptions::new()
|
||||
.create(false)
|
||||
.read(true)
|
||||
.open(file)?,
|
||||
);
|
||||
|
||||
let mut asm_ops = Vec::new();
|
||||
for (line, val) in rd_f.lines().map_while(Result::ok).enumerate() {
|
||||
if let Some(comment) = val.trim().strip_prefix(ASM_COMMENT_PREFIX) {
|
||||
asm_ops.push(AsmOp::Comment(comment.to_string()))
|
||||
} else if !val.is_empty() {
|
||||
match Op::from_str(&val) {
|
||||
Ok(op) => asm_ops.push(AsmOp::Stmt(op)),
|
||||
Err(err) => {
|
||||
tracing::warn!("ReadAsm failed @{file}:{}", line + 1);
|
||||
anyhow::bail!(
|
||||
"ReadAsm failed @{file}:{} with {}",
|
||||
line + 1,
|
||||
err.to_string()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Self(asm_ops))
|
||||
}
|
||||
}
|
||||
|
||||
impl<Op> Program<Op>
|
||||
where
|
||||
Op: std::fmt::Display,
|
||||
{
|
||||
/// Generic function to write Op in ASM file
|
||||
/// Work on any kind of Op that implement Display
|
||||
pub fn write_asm(&self, file: &str) -> Result<(), anyhow::Error> {
|
||||
// Create path
|
||||
let path = std::path::Path::new(file);
|
||||
if let Some(dir_p) = path.parent() {
|
||||
std::fs::create_dir_all(dir_p).unwrap();
|
||||
}
|
||||
|
||||
// Open file
|
||||
let mut wr_f = std::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(path)?;
|
||||
|
||||
writeln!(wr_f, "{self}").map_err(anyhow::Error::new)
|
||||
}
|
||||
}
|
||||
|
||||
// Implement dedicated hex parser/dumper for DOp
|
||||
impl Program<dop::DOp> {
|
||||
/// Generic function to extract OP from hex file
|
||||
/// Work on any kind of Op that implement FromStr
|
||||
pub fn read_hex(file: &str) -> Result<Self, anyhow::Error> {
|
||||
// Open file
|
||||
let rd_f = std::io::BufReader::new(
|
||||
std::fs::OpenOptions::new()
|
||||
.create(false)
|
||||
.read(true)
|
||||
.open(file)
|
||||
.unwrap_or_else(|_| panic!("Invalid HEX file {file}")),
|
||||
);
|
||||
|
||||
let mut prog = Self::default();
|
||||
for (line, val) in rd_f.lines().map_while(Result::ok).enumerate() {
|
||||
if let Some(comment) = val.trim().strip_prefix(ASM_COMMENT_PREFIX) {
|
||||
prog.push_comment(comment.to_string());
|
||||
} else {
|
||||
let val_u32 =
|
||||
dop::DOpRepr::from_str_radix(std::str::from_utf8(val.as_bytes()).unwrap(), 16)?;
|
||||
match dop::DOp::from_hex(val_u32) {
|
||||
Ok(op) => prog.push_stmt(op),
|
||||
Err(err) => {
|
||||
tracing::warn!("DOp::ReadHex failed @{file}:{}", line + 1);
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(prog)
|
||||
}
|
||||
|
||||
/// Generic function to write Op in Hex file
|
||||
pub fn write_hex(&self, file: &str) -> Result<(), anyhow::Error> {
|
||||
// Create path
|
||||
let path = std::path::Path::new(file);
|
||||
if let Some(dir_p) = path.parent() {
|
||||
std::fs::create_dir_all(dir_p).unwrap();
|
||||
}
|
||||
|
||||
// Open file
|
||||
let mut wr_f = std::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(path)?;
|
||||
|
||||
for op in self.0.iter() {
|
||||
match op {
|
||||
AsmOp::Comment(comment) => writeln!(wr_f, "{}{}", ASM_COMMENT_PREFIX[0], comment)?,
|
||||
AsmOp::Stmt(op) => writeln!(wr_f, "{:x}", op.to_hex())?,
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Program<dop::DOp> {
|
||||
/// Convert a program of Dops in translation table
|
||||
pub fn tr_table(&self) -> Vec<dop::DOpRepr> {
|
||||
let ops_stream = self
|
||||
.iter()
|
||||
.filter_map(|op| match op {
|
||||
AsmOp::Comment(_) => None,
|
||||
AsmOp::Stmt(op) => Some(op),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut words_stream = Vec::with_capacity(ops_stream.len() + 1);
|
||||
// First word of the stream is length in DOp
|
||||
words_stream.push(ops_stream.len() as u32);
|
||||
|
||||
ops_stream.iter().for_each(|op| {
|
||||
words_stream.push(op.to_hex());
|
||||
});
|
||||
words_stream
|
||||
}
|
||||
}
|
||||
|
||||
// Implement dedicated hex parser/dumper for IOp
|
||||
impl Program<iop::IOp> {
|
||||
/// Generic function to extract OP from hex file
|
||||
pub fn read_hex(file: &str) -> Result<Self, anyhow::Error> {
|
||||
// Open file
|
||||
let rd_f = std::io::BufReader::new(
|
||||
std::fs::OpenOptions::new()
|
||||
.create(false)
|
||||
.read(true)
|
||||
.open(file)
|
||||
.unwrap_or_else(|_| panic!("Invalid HEX file {file}")),
|
||||
);
|
||||
|
||||
let mut prog = Self::default();
|
||||
// Buffer word stream.
|
||||
// When comment token occurred, convert the word stream into IOp
|
||||
// -> No comment could be inserted in a middle of IOp word stream
|
||||
let mut word_stream = VecDeque::new();
|
||||
let mut file_len = 0;
|
||||
|
||||
for val in rd_f.lines().map_while(Result::ok) {
|
||||
file_len += 1;
|
||||
if let Some(comment) = val.trim().strip_prefix(ASM_COMMENT_PREFIX) {
|
||||
while !word_stream.is_empty() {
|
||||
match iop::IOp::from_words(&mut word_stream) {
|
||||
Ok(op) => prog.push_stmt(op),
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
"IOp::ReadHex failed @{file}:{}",
|
||||
file_len - word_stream.len()
|
||||
);
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
prog.push_comment(comment.to_string());
|
||||
} else {
|
||||
let word = iop::IOpWordRepr::from_str_radix(
|
||||
std::str::from_utf8(val.as_bytes()).unwrap(),
|
||||
16,
|
||||
)?;
|
||||
word_stream.push_back(word);
|
||||
}
|
||||
}
|
||||
// Flush word stream
|
||||
while !word_stream.is_empty() {
|
||||
match iop::IOp::from_words(&mut word_stream) {
|
||||
Ok(op) => prog.push_stmt(op),
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
"IOp::ReadHex failed @{file}:{}",
|
||||
file_len - word_stream.len()
|
||||
);
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(prog)
|
||||
}
|
||||
|
||||
/// Generic function to write Op in Hex file
|
||||
pub fn write_hex(&self, file: &str) -> Result<(), anyhow::Error> {
|
||||
// Create path
|
||||
let path = std::path::Path::new(file);
|
||||
if let Some(dir_p) = path.parent() {
|
||||
std::fs::create_dir_all(dir_p).unwrap();
|
||||
}
|
||||
|
||||
// Open file
|
||||
let mut wr_f = std::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(path)?;
|
||||
|
||||
for op in self.0.iter() {
|
||||
match op {
|
||||
AsmOp::Comment(comment) => writeln!(wr_f, "{}{}", ASM_COMMENT_PREFIX[0], comment)?,
|
||||
AsmOp::Stmt(op) => {
|
||||
op.to_words()
|
||||
.into_iter()
|
||||
.try_for_each(|word| writeln!(wr_f, "{word:0>8x}"))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user