Merge pull request #49 from itzmeanjan/conform-to-nist-fips-203-ipd

Conform to NIST FIPS-203 Draft Standard for ML-KEM
This commit is contained in:
Anjan Roy
2024-06-18 22:03:47 +04:00
committed by GitHub
54 changed files with 4073 additions and 4206 deletions

View File

@@ -1,4 +1,4 @@
name: Test Kyber Key Encapsulation Mechanism
name: Test Ml_kem Key Encapsulation Mechanism
on:
push:

View File

@@ -14,7 +14,7 @@ DEP_IFLAGS = -I $(SHA3_INC_DIR) -I $(SUBTLE_INC_DIR)
DUDECT_DEP_IFLAGS = $(DEP_IFLAGS) -I $(DUDECT_INC_DIR)
SRC_DIR = include
KYBER_SOURCES := $(wildcard $(SRC_DIR)/*.hpp)
ML_KEM_SOURCES := $(shell find $(SRC_DIR) -name '*.hpp')
BUILD_DIR = build
DUDECT_BUILD_DIR = $(BUILD_DIR)/dudect
ASAN_BUILD_DIR = $(BUILD_DIR)/asan
@@ -23,6 +23,7 @@ UBSAN_BUILD_DIR = $(BUILD_DIR)/ubsan
TEST_DIR = tests
DUDECT_TEST_DIR = $(TEST_DIR)/dudect
TEST_SOURCES := $(wildcard $(TEST_DIR)/*.cpp)
TEST_HEADERS := $(wildcard $(TEST_DIR)/*.hpp)
DUDECT_TEST_SOURCES := $(wildcard $(DUDECT_TEST_DIR)/*.cpp)
TEST_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES))))
ASAN_TEST_OBJECTS := $(addprefix $(ASAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES))))
@@ -58,16 +59,16 @@ $(BUILD_DIR):
mkdir -p $@
$(SHA3_INC_DIR):
git submodule update --init
git submodule update --init sha3
$(DUDECT_INC_DIR): $(SHA3_INC_DIR)
git submodule update --init
git submodule update --init dudect
$(SUBTLE_INC_DIR): $(DUDECT_INC_DIR)
git submodule update --init
git submodule update --init subtle
$(GTEST_PARALLEL): $(SUBTLE_INC_DIR)
git submodule update --init
git submodule update --init gtest-parallel
$(BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR)
$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@
@@ -123,5 +124,5 @@ perf: $(PERF_BINARY)
clean:
rm -rf $(BUILD_DIR)
format: $(KYBER_SOURCES) $(TEST_SOURCES) $(DUDECT_TEST_SOURCES) $(BENCHMARK_SOURCES) $(BENCHMARK_HEADERS)
format: $(ML_KEM_SOURCES) $(TEST_SOURCES) $(TEST_HEADERS) $(DUDECT_TEST_SOURCES) $(BENCHMARK_SOURCES) $(BENCHMARK_HEADERS)
clang-format -i $^

523
README.md
View File

@@ -1,27 +1,27 @@
> [!CAUTION]
> This Kyber implementation is conformant with Kyber specification https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf and I also *try* to make it timing leakage free, using **dudect** (see https://github.com/oreparaz/dudect) -based tests, but be informed that this implementation is not yet audited. *If you consider using it in production, be careful !*
> This ML-KEM implementation is conformant with ML-KEM draft standard https://doi.org/10.6028/NIST.FIPS.203.ipd and I also *try* to make it timing leakage free, using **dudect** (see https://github.com/oreparaz/dudect) -based tests, but be informed that this implementation is not yet audited. *If you consider using it in production, please be careful !*
# kyber
CRYSTALS-Kyber: Post-Quantum Public-key Encryption &amp; Key-establishment Algorithm
# ML-KEM (formerly known as Kyber)
Module-Lattice -based Key Encapsulation Mechanism Standard by NIST.
## Motivation
Kyber is being standardized by NIST as post-quantum secure key encapsulation mechanism (KEM), which can be used for key establishment.
ML-KEM is being standardized by NIST as post-quantum secure key encapsulation mechanism (KEM), which can be used for key establishment, between two parties, communicating over insecure channel.
Kyber offers an *IND-CCA2-secure* Key Encapsulation Mechanism - its security is based on the hardness of solving the learning-with-errors (LWE) problem in module (i.e. structured) lattices.
ML-KEM offers an *IND-CCA-secure* Key Encapsulation Mechanism - its security is based on the hardness of solving the learning-with-errors (LWE) problem in module (i.e. structured) lattices.
Kyber Key Encapsulation Mechanism is built on top of *IND-CPA-secure Kyber Public Key Encryption*, where two communicating parties, both generating their key pairs, while publishing only their public keys to each other, can encrypt fixed length ( = 32 -bytes ) message using peer's public key. Cipher text can be decrypted by corresponding secret key ( which is private to the keypair owner ) and 32 -bytes message can be recovered back. Then a slightly tweaked FujisakiOkamoto (FO) transform is applied on *IND-CPA-secure Kyber PKE* - giving us the *IND-CCA2-secure KEM* construction. In KEM scheme, two parties interested in establishing a secure communication channel over public & insecure channel, can generate a shared secret key ( of arbitrary byte length ) from a key derivation function ( i.e. KDF which is SHAKE256 Xof in this context ) which is obtained by both of these parties as result of seeding SHAKE256 Xof with same secret. This secret is 32 -bytes and that's what is communicated by sender to receiver using underlying Kyber PKE scheme.
ML-KEM is built on top of *IND-CPA-secure K-PKE*, where two communicating parties, both generating their key pairs, while publishing only their public keys to each other, can encrypt fixed length ( = 32 -bytes ) message using peer's public key. Cipher text can be decrypted by corresponding secret key ( which is private to the keypair owner ) and 32 -bytes message can be recovered back. Then a slightly tweaked FujisakiOkamoto (FO) transform is applied on *IND-CPA-secure K-PKE* - giving us the *IND-CCA-secure ML-KEM* construction. In KEM scheme, two parties interested in establishing a secure communication channel, over public & insecure channel, can generate a 32 -bytes shared secret key. Now they can be use this 32 -bytes shared secret key in any symmetric key primitive, either for encrypting their communication (in much faster way) or deriving new/ longer keys.
Algorithm | Input | Output
--- | :-: | --:
KEM KeyGen | - | Public Key and Secret Key
Encapsulation | Public Key | Cipher Text and SHAKE256 KDF
Decapsulation | Secret Key and Cipher Text | SHAKE256 KDF
KeyGen | - | Public Key and Secret Key
Encapsulation | Public Key | Cipher Text and 32B Shared Secret
Decapsulation | Secret Key and Cipher Text | 32B Shared Secret
Here I'm maintaining `kyber` - a header-only and easy-to-use ( see more in [usage](#usage) ) C++ library implementing Kyber KEM, supporting Kyber-{512, 768, 1024} parameter sets, as defined in table 1 of Kyber specification. `sha3`, `subtle` and `dudect` (for timing leakage tests) are dependencies of this library, which are pinned to specific git commits, using git submodule.
Here I'm maintaining `kyber` - a C++20 header-only `constexpr` library, implementing ML-KEM, supporting ML-KEM-{512, 768, 1024} parameter sets, as defined in table 2 of ML-KEM draft standard. It's pretty easy to use, see [usage](#usage).
> [!NOTE]
> Find Kyber specification https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf - this is the document that I followed when implementing Kyber. I suggest you go through the specification to get an in-depth understanding of Kyber PQC suite.
> Find ML-KEM draft standard @ https://doi.org/10.6028/NIST.FIPS.203.ipd - this is the document that I followed when implementing ML-KEM. I suggest you go through the specification to get an in-depth understanding of the scheme.
## Prerequisites
@@ -29,13 +29,10 @@ Here I'm maintaining `kyber` - a header-only and easy-to-use ( see more in [usag
```bash
$ clang++ --version
Ubuntu clang version 17.0.2 (1~exp1ubuntu2.1)
Ubuntu clang version 17.0.6 (9ubuntu1)
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin
$ g++ --version
g++ (Ubuntu 13.2.0-4ubuntu3) 13.2.0
```
- Build tools such as `make`, `cmake`.
@@ -48,18 +45,18 @@ $ cmake --version
cmake version 3.25.1
```
- For testing Kyber KEM implementation, you need to globally install `google-test` library and headers. Follow [this](https://github.com/google/googletest/tree/main/googletest#standalone-cmake-project) guide, if you don't have it installed.
- For benchmarking Kyber KEM implementation, targeting CPU systems, you'll need to have `google-benchmark` header and library globally installed. I found guide @ https://github.com/google/benchmark#installation helpful.
- For testing ML-KEM implementation, you need to globally install `google-test` library and headers. Follow guide @ https://github.com/google/googletest/tree/main/googletest#standalone-cmake-project, if you don't have it installed.
- For benchmarking ML-KEM implementation, you'll need to have `google-benchmark` header and library globally installed. I found guide @ https://github.com/google/benchmark#installation helpful.
> [!NOTE]
> If you are on a machine running GNU/Linux kernel and you want to obtain *CPU cycle* count for KEM routines, you should consider building `google-benchmark` library with `libPFM` support, following https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7, a step-by-step guide. Find more about libPFM @ https://perfmon2.sourceforge.net.
> If you are on a machine running GNU/Linux kernel and you want to obtain *CPU cycle* count for ML-KEM routines, you should consider building `google-benchmark` library with `libPFM` support, following https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7, a step-by-step guide. Find more about libPFM @ https://perfmon2.sourceforge.net.
> [!TIP]
> Git submodule based dependencies will mostly be imported automatically, but in case that doesn't work, you can manually initialize and update them by issuing `$ git submodule update --init` from inside the root of this repository.
> Git submodule based dependencies will normally be imported automatically, but in case that doesn't work, you can manually initialize and update them by issuing `$ git submodule update --init` from inside the root of this repository.
## Testing
For testing functional correctness and conformance with Kyber specification, you have to issue
For testing functional correctness of this implementation and conformance with ML-KEM draft standard, you have to issue
> [!NOTE]
> Known Answer Test (KAT) files living in [this](./kats/) directory are generated by following (reproducible) steps, described in https://gist.github.com/itzmeanjan/c8f5bc9640d0f0bdd2437dfe364d7710.
@@ -71,24 +68,22 @@ make ubsan_test -j # Run tests with UndefinedBehaviourSanitizer enabled
```
```bash
[10/10] KyberKEM.ArithmeticOverZq (149 ms)
PASSED TESTS (10/10):
1 ms: build/test.out KyberKEM.NumberTheoreticTransform
1 ms: build/test.out KyberKEM.PolynomialSerialization
1 ms: build/test.out KyberKEM.Kyber768KeygenEncapsDecaps
2 ms: build/test.out KyberKEM.Kyber512KeygenEncapsDecaps
2 ms: build/test.out KyberKEM.Kyber1024KeygenEncapsDecaps
16 ms: build/test.out KyberKEM.Kyber512KnownAnswerTests
21 ms: build/test.out KyberKEM.Kyber1024KnownAnswerTests
22 ms: build/test.out KyberKEM.Kyber768KnownAnswerTests
99 ms: build/test.out KyberKEM.CompressDecompressZq
149 ms: build/test.out KyberKEM.ArithmeticOverZq
PASSED TESTS (9/9):
3 ms: build/test.out ML_KEM.ML_KEM_512_KeygenEncapsDecaps
3 ms: build/test.out ML_KEM.PolynomialSerialization
4 ms: build/test.out ML_KEM.ML_KEM_768_KeygenEncapsDecaps
4 ms: build/test.out ML_KEM.ML_KEM_1024_KeygenEncapsDecaps
41 ms: build/test.out ML_KEM.ML_KEM_512_KnownAnswerTests
63 ms: build/test.out ML_KEM.ML_KEM_1024_KnownAnswerTests
64 ms: build/test.out ML_KEM.ML_KEM_768_KnownAnswerTests
226 ms: build/test.out ML_KEM.CompressDecompressZq
284 ms: build/test.out ML_KEM.ArithmeticOverZq
```
In case you're interested in running timing leakage tests using `dudect`, execute following
> [!NOTE]
> `dudect` is integrated into this library implementation of Kyber KEM to find any sort of timing leakages. It checks for constant-timeness of all *vital* functions including Fujisaki-Okamoto transform, used in decapsulation step. It doesn't check constant-timeness of function which samples public matrix `A`, because that fails the check anyway, due to use of uniform rejection sampling. As matrix `A` is public, it's not critical that it must be *strictly* constant-time.
> `dudect` is integrated into this library implementation of ML-KEM to find any sort of timing leakages. It checks for constant-timeness of all *vital* functions including Fujisaki-Okamoto transform, used in decapsulation step. It doesn't check constant-timeness of function which samples public matrix `A`, because that fails the check anyway, due to use of uniform rejection sampling. As matrix `A` is public, it's not critical that it must be *strictly* constant-time.
```bash
# Can only be built and run x86_64 machine.
@@ -98,9 +93,9 @@ make dudect_test_build -j
# Before running the constant-time tests, it's a good idea to put all CPU cores on "performance" mode.
# You may find guide @ https://github.com/google/benchmark/blob/main/docs/reducing_variance.md helpful.
timeout 10m taskset -c 0 ./build/dudect/test_kyber512_kem.out
timeout 10m taskset -c 0 ./build/dudect/test_kyber768_kem.out
timeout 10m taskset -c 0 ./build/dudect/test_kyber1024_kem.out
timeout 10m taskset -c 0 ./build/dudect/test_ml_kem_512.out
timeout 10m taskset -c 0 ./build/dudect/test_ml_kem_768.out
timeout 10m taskset -c 0 ./build/dudect/test_ml_kem_1024.out
```
> [!TIP]
@@ -126,257 +121,254 @@ meas: 59.97 M, max t: +2.64, max tau: 3.41e-04, (5/tau)^2: 2.14e+08. For the
## Benchmarking
For benchmarking Kyber KEM routines ( i.e. keygen, encaps and decaps ) for various suggested parameter sets, you have to issue.
For benchmarking ML-KEM public functions such as keygen, encaps and decaps, for various suggested parameter sets, you have to issue.
```bash
make benchmark # If you haven't built google-benchmark library with libPFM support.
make perf # If you have built google-benchmark library with libPFM support.
make benchmark -j # If you haven't built google-benchmark library with libPFM support.
make perf -j # If you have built google-benchmark library with libPFM support.
```
> [!NOTE]
> Benchmarking expects presence of `google-benchmark` header and library in global namespace ( so that it can be found by the compiler ).
> [!CAUTION]
> When benchmarking, ensure that you've disabled CPU frequency scaling, by following guide @ https://github.com/google/benchmark/blob/main/docs/reducing_variance.md.
> [!NOTE]
> `make perf` - was issued when collecting following benchmarks. Notice, *cycles* column, denoting cost of executing Kyber KEM routines in terms of CPU cycles. Follow [this](https://github.com/google/benchmark/blob/main/docs/perf_counters.md) for more details.
> `make perf` - was issued when collecting following benchmarks. Notice, *cycles* column, denoting cost of executing ML-KEM functions, in terms of CPU cycles. Follow https://github.com/google/benchmark/blob/main/docs/perf_counters.md for more details.
### On 12th Gen Intel(R) Core(TM) i7-1260P
Compiled with **gcc version 13.2.0 (Ubuntu 13.2.0-4ubuntu3)**.
Compiled with **gcc (Ubuntu 14-20240412-0ubuntu1) 14.0.1 20240412**.
```bash
$ uname -srm
Linux 6.5.0-14-generic x86_64
Linux 6.8.0-35-generic x86_64
```
```bash
2024-01-22T19:09:06+04:00
2024-06-18T21:12:04+04:00
Running ./build/perf.out
Run on (16 X 752.14 MHz CPU s)
Run on (16 X 842.086 MHz CPU s)
CPU Caches:
L1 Data 48 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 1280 KiB (x8)
L3 Unified 18432 KiB (x1)
Load Average: 1.35, 0.74, 0.64
---------------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations CYCLES items_per_second rdtsc
---------------------------------------------------------------------------------------------------------
kyber512/keygen_mean 14.1 us 14.1 us 10 64.8786k 71.1611k/s 35.056k
kyber512/keygen_median 13.9 us 13.9 us 10 64.8328k 71.8418k/s 34.704k
kyber512/keygen_stddev 0.363 us 0.362 us 10 533.391 1.73436k/s 903.837
kyber512/keygen_cv 2.58 % 2.57 % 10 0.82% 2.44% 2.58%
kyber512/keygen_min 13.8 us 13.8 us 10 64.1864k 66.4408k/s 34.367k
kyber512/keygen_max 15.1 us 15.1 us 10 66.2011k 72.5455k/s 37.531k
kyber1024/decap_mean 47.9 us 47.9 us 10 222.332k 20.8836k/s 119.488k
kyber1024/decap_median 47.8 us 47.8 us 10 222.36k 20.909k/s 119.335k
kyber1024/decap_stddev 0.345 us 0.345 us 10 847.653 149.328/s 860.065
kyber1024/decap_cv 0.72 % 0.72 % 10 0.38% 0.72% 0.72%
kyber1024/decap_min 47.4 us 47.4 us 10 220.724k 20.529k/s 118.295k
kyber1024/decap_max 48.7 us 48.7 us 10 223.956k 21.0947k/s 121.542k
kyber768/encap_mean 28.9 us 28.9 us 10 133.838k 34.632k/s 72.0448k
kyber768/encap_median 28.8 us 28.8 us 10 133.943k 34.7766k/s 71.729k
kyber768/encap_stddev 0.389 us 0.389 us 10 424.097 455.864/s 969.721
kyber768/encap_cv 1.35 % 1.35 % 10 0.32% 1.32% 1.35%
kyber768/encap_min 28.5 us 28.5 us 10 133.171k 33.474k/s 71.097k
kyber768/encap_max 29.9 us 29.9 us 10 134.415k 35.0874k/s 74.524k
kyber512/encap_mean 17.5 us 17.5 us 10 81.3077k 56.9959k/s 43.7583k
kyber512/encap_median 17.5 us 17.5 us 10 81.3109k 57.1806k/s 43.614k
kyber512/encap_stddev 0.178 us 0.178 us 10 224.364 572.266/s 443.14
kyber512/encap_cv 1.01 % 1.01 % 10 0.28% 1.00% 1.01%
kyber512/encap_min 17.3 us 17.3 us 10 80.9421k 55.7884k/s 43.182k
kyber512/encap_max 17.9 us 17.9 us 10 81.6759k 57.7496k/s 44.702k
kyber1024/encap_mean 44.1 us 44.1 us 10 204.634k 22.6603k/s 110.119k
kyber1024/encap_median 44.0 us 44.0 us 10 204.79k 22.7169k/s 109.836k
kyber1024/encap_stddev 0.358 us 0.356 us 10 751.071 180.658/s 891.891
kyber1024/encap_cv 0.81 % 0.81 % 10 0.37% 0.80% 0.81%
kyber1024/encap_min 43.7 us 43.7 us 10 202.876k 22.2099k/s 109.114k
kyber1024/encap_max 45.0 us 45.0 us 10 205.644k 22.8667k/s 112.348k
kyber1024/keygen_mean 37.6 us 37.6 us 10 174.399k 26.5696k/s 93.9229k
kyber1024/keygen_median 37.7 us 37.7 us 10 174.662k 26.5444k/s 94.024k
kyber1024/keygen_stddev 0.417 us 0.415 us 10 1.34601k 292.441/s 1.04079k
kyber1024/keygen_cv 1.11 % 1.10 % 10 0.77% 1.10% 1.11%
kyber1024/keygen_min 36.9 us 36.9 us 10 172.239k 26.0098k/s 91.983k
kyber1024/keygen_max 38.5 us 38.4 us 10 176.088k 27.1239k/s 95.953k
kyber768/keygen_mean 23.6 us 23.6 us 10 109.11k 42.3017k/s 58.9747k
kyber768/keygen_median 23.7 us 23.7 us 10 109.577k 42.2725k/s 59.0055k
kyber768/keygen_stddev 0.310 us 0.310 us 10 786.552 554.447/s 772.922
kyber768/keygen_cv 1.31 % 1.31 % 10 0.72% 1.31% 1.31%
kyber768/keygen_min 23.2 us 23.2 us 10 108.011k 41.3191k/s 57.748k
kyber768/keygen_max 24.2 us 24.2 us 10 109.909k 43.1928k/s 60.37k
kyber512/decap_mean 19.7 us 19.7 us 10 91.4808k 50.6517k/s 49.2443k
kyber512/decap_median 19.7 us 19.7 us 10 91.4678k 50.6475k/s 49.2465k
kyber512/decap_stddev 0.186 us 0.186 us 10 554.643 475.223/s 463.271
kyber512/decap_cv 0.94 % 0.94 % 10 0.61% 0.94% 0.94%
kyber512/decap_min 19.5 us 19.5 us 10 90.7913k 49.8154k/s 48.691k
kyber512/decap_max 20.1 us 20.1 us 10 92.7485k 51.2228k/s 50.066k
kyber768/decap_mean 31.8 us 31.8 us 10 147.512k 31.4865k/s 79.2379k
kyber768/decap_median 31.7 us 31.7 us 10 147.59k 31.5118k/s 79.1735k
kyber768/decap_stddev 0.129 us 0.128 us 10 344.756 126.89/s 320.866
kyber768/decap_cv 0.41 % 0.40 % 10 0.23% 0.40% 0.40%
kyber768/decap_min 31.6 us 31.6 us 10 146.652k 31.2195k/s 78.891k
kyber768/decap_max 32.0 us 32.0 us 10 147.851k 31.6233k/s 79.914k
Load Average: 0.59, 0.65, 0.66
------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations CYCLES items_per_second
------------------------------------------------------------------------------------------------
ml_kem_1024/keygen_mean 37.7 us 37.7 us 10 168.625k 26.5586k/s
ml_kem_1024/keygen_median 37.8 us 37.8 us 10 168.466k 26.4937k/s
ml_kem_1024/keygen_stddev 0.867 us 0.856 us 10 883.281 605.108/s
ml_kem_1024/keygen_cv 2.30 % 2.27 % 10 0.52% 2.28%
ml_kem_1024/keygen_min 36.5 us 36.5 us 10 167.909k 25.8962k/s
ml_kem_1024/keygen_max 38.7 us 38.6 us 10 171.052k 27.3982k/s
ml_kem_512/decap_mean 20.4 us 20.4 us 10 92.5549k 49.0213k/s
ml_kem_512/decap_median 20.3 us 20.3 us 10 92.4039k 49.1818k/s
ml_kem_512/decap_stddev 0.258 us 0.252 us 10 577.305 600.776/s
ml_kem_512/decap_cv 1.26 % 1.23 % 10 0.62% 1.23%
ml_kem_512/decap_min 20.0 us 20.0 us 10 92.1723k 47.8732k/s
ml_kem_512/decap_max 20.9 us 20.9 us 10 94.1701k 49.888k/s
ml_kem_512/encap_mean 16.4 us 16.4 us 10 72.6916k 60.9038k/s
ml_kem_512/encap_median 16.4 us 16.4 us 10 72.6753k 60.8974k/s
ml_kem_512/encap_stddev 0.253 us 0.250 us 10 97.0585 935.823/s
ml_kem_512/encap_cv 1.54 % 1.53 % 10 0.13% 1.54%
ml_kem_512/encap_min 15.9 us 15.9 us 10 72.5484k 59.7296k/s
ml_kem_512/encap_max 16.8 us 16.7 us 10 72.8346k 62.8025k/s
ml_kem_768/decap_mean 33.0 us 33.0 us 10 148.191k 30.3166k/s
ml_kem_768/decap_median 33.1 us 33.1 us 10 148.138k 30.1903k/s
ml_kem_768/decap_stddev 0.518 us 0.509 us 10 212.758 473.277/s
ml_kem_768/decap_cv 1.57 % 1.54 % 10 0.14% 1.56%
ml_kem_768/decap_min 32.1 us 32.1 us 10 147.836k 29.7687k/s
ml_kem_768/decap_max 33.6 us 33.6 us 10 148.61k 31.1568k/s
ml_kem_512/keygen_mean 14.6 us 14.6 us 10 63.4765k 68.3813k/s
ml_kem_512/keygen_median 14.8 us 14.8 us 10 63.4589k 67.7965k/s
ml_kem_512/keygen_stddev 0.241 us 0.240 us 10 60.264 1.14394k/s
ml_kem_512/keygen_cv 1.65 % 1.64 % 10 0.09% 1.67%
ml_kem_512/keygen_min 14.1 us 14.1 us 10 63.3859k 67.5222k/s
ml_kem_512/keygen_max 14.8 us 14.8 us 10 63.5564k 71.0285k/s
ml_kem_1024/decap_mean 49.3 us 49.3 us 10 216.516k 20.2885k/s
ml_kem_1024/decap_median 49.5 us 49.4 us 10 216.383k 20.2235k/s
ml_kem_1024/decap_stddev 0.649 us 0.634 us 10 346.756 261.841/s
ml_kem_1024/decap_cv 1.32 % 1.29 % 10 0.16% 1.29%
ml_kem_1024/decap_min 48.3 us 48.3 us 10 216.031k 19.967k/s
ml_kem_1024/decap_max 50.1 us 50.1 us 10 217.187k 20.6884k/s
ml_kem_1024/encap_mean 41.8 us 41.8 us 10 183.083k 23.9532k/s
ml_kem_1024/encap_median 41.8 us 41.8 us 10 183.077k 23.9381k/s
ml_kem_1024/encap_stddev 0.563 us 0.551 us 10 218.08 315.804/s
ml_kem_1024/encap_cv 1.35 % 1.32 % 10 0.12% 1.32%
ml_kem_1024/encap_min 41.0 us 41.0 us 10 182.737k 23.5351k/s
ml_kem_1024/encap_max 42.6 us 42.5 us 10 183.483k 24.4145k/s
ml_kem_768/encap_mean 27.4 us 27.4 us 10 121.805k 36.5012k/s
ml_kem_768/encap_median 27.4 us 27.4 us 10 121.632k 36.553k/s
ml_kem_768/encap_stddev 0.692 us 0.687 us 10 644.207 909.698/s
ml_kem_768/encap_cv 2.52 % 2.50 % 10 0.53% 2.49%
ml_kem_768/encap_min 26.5 us 26.5 us 10 121.249k 35.0289k/s
ml_kem_768/encap_max 28.6 us 28.5 us 10 123.228k 37.7644k/s
ml_kem_768/keygen_mean 25.0 us 25.0 us 10 110.546k 40.0317k/s
ml_kem_768/keygen_median 25.0 us 25.0 us 10 110.151k 40.0223k/s
ml_kem_768/keygen_stddev 0.855 us 0.854 us 10 861.179 1.36001k/s
ml_kem_768/keygen_cv 3.42 % 3.41 % 10 0.78% 3.40%
ml_kem_768/keygen_min 24.1 us 24.1 us 10 109.801k 38.1413k/s
ml_kem_768/keygen_max 26.2 us 26.2 us 10 112.141k 41.5697k/s
```
### On ARM Cortex-A72 i.e. Raspberry Pi 4B
Compiled with **gcc version 13.2.0 (Ubuntu 13.2.0-4ubuntu3)**.
Compiled with **gcc (Ubuntu 13.2.0-23ubuntu4) 13.2.0**.
```bash
$ uname -srm
Linux 6.5.0-1008-raspi aarch64
Linux 6.8.0-1005-raspi aarch64
```
```bash
2024-01-22T19:22:33+04:00
Running ./build/perf.out
2024-06-18T21:49:48+04:00
Running ./build/bench.out
Run on (4 X 1800 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x4)
L1 Instruction 48 KiB (x4)
L2 Unified 1024 KiB (x1)
Load Average: 2.32, 2.85, 1.46
----------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations CYCLES items_per_second
----------------------------------------------------------------------------------------------
kyber1024/decap_mean 250 us 250 us 10 448.76k 4.00416k/s
kyber1024/decap_median 250 us 250 us 10 448.888k 4.00246k/s
kyber1024/decap_stddev 0.401 us 0.405 us 10 738.224 6.49398/s
kyber1024/decap_cv 0.16 % 0.16 % 10 0.16% 0.16%
kyber1024/decap_min 249 us 249 us 10 447.75k 3.99346k/s
kyber1024/decap_max 251 us 250 us 10 450.06k 4.01306k/s
kyber512/decap_mean 106 us 106 us 10 189.763k 9.469k/s
kyber512/decap_median 106 us 106 us 10 189.753k 9.4696k/s
kyber512/decap_stddev 0.293 us 0.291 us 10 529.974 26.0654/s
kyber512/decap_cv 0.28 % 0.28 % 10 0.28% 0.28%
kyber512/decap_min 105 us 105 us 10 189.096k 9.41547k/s
kyber512/decap_max 106 us 106 us 10 190.852k 9.50263k/s
kyber768/encap_mean 148 us 148 us 10 265.507k 6.76869k/s
kyber768/encap_median 148 us 148 us 10 265.41k 6.77083k/s
kyber768/encap_stddev 0.566 us 0.567 us 10 1.0282k 25.9589/s
kyber768/encap_cv 0.38 % 0.38 % 10 0.39% 0.38%
kyber768/encap_min 147 us 147 us 10 263.583k 6.71972k/s
kyber768/encap_max 149 us 149 us 10 267.479k 6.81811k/s
kyber512/encap_mean 90.0 us 90.0 us 10 161.649k 11.117k/s
kyber512/encap_median 90.0 us 89.9 us 10 161.581k 11.121k/s
kyber512/encap_stddev 0.345 us 0.347 us 10 626.388 42.6811/s
kyber512/encap_cv 0.38 % 0.39 % 10 0.39% 0.38%
kyber512/encap_min 89.6 us 89.6 us 10 160.933k 11.0122k/s
kyber512/encap_max 90.9 us 90.8 us 10 163.199k 11.1667k/s
kyber768/keygen_mean 119 us 119 us 10 213.516k 8.416k/s
kyber768/keygen_median 119 us 119 us 10 213.534k 8.41435k/s
kyber768/keygen_stddev 0.275 us 0.277 us 10 496.099 19.6189/s
kyber768/keygen_cv 0.23 % 0.23 % 10 0.23% 0.23%
kyber768/keygen_min 118 us 118 us 10 212.691k 8.3908k/s
kyber768/keygen_max 119 us 119 us 10 214.168k 8.44783k/s
kyber1024/keygen_mean 188 us 188 us 10 337.777k 5.3203k/s
kyber1024/keygen_median 188 us 188 us 10 337.479k 5.32517k/s
kyber1024/keygen_stddev 0.785 us 0.791 us 10 1.42498k 22.2604/s
kyber1024/keygen_cv 0.42 % 0.42 % 10 0.42% 0.42%
kyber1024/keygen_min 187 us 187 us 10 336.121k 5.26713k/s
kyber1024/keygen_max 190 us 190 us 10 341.212k 5.34588k/s
kyber512/keygen_mean 69.0 us 68.9 us 10 123.818k 14.5129k/s
kyber512/keygen_median 69.0 us 68.9 us 10 123.807k 14.5138k/s
kyber512/keygen_stddev 0.152 us 0.148 us 10 253.268 31.0736/s
kyber512/keygen_cv 0.22 % 0.21 % 10 0.20% 0.21%
kyber512/keygen_min 68.7 us 68.7 us 10 123.395k 14.4549k/s
kyber512/keygen_max 69.2 us 69.2 us 10 124.311k 14.5653k/s
kyber768/decap_mean 170 us 170 us 10 304.634k 5.89868k/s
kyber768/decap_median 170 us 169 us 10 304.463k 5.9015k/s
kyber768/decap_stddev 0.654 us 0.648 us 10 1.15668k 22.5143/s
kyber768/decap_cv 0.39 % 0.38 % 10 0.38% 0.38%
kyber768/decap_min 169 us 169 us 10 303.091k 5.86043k/s
kyber768/decap_max 171 us 171 us 10 306.634k 5.92931k/s
kyber1024/encap_mean 224 us 224 us 10 401.823k 4.47202k/s
kyber1024/encap_median 224 us 223 us 10 401.482k 4.4752k/s
kyber1024/encap_stddev 0.802 us 0.804 us 10 1.47807k 16.038/s
kyber1024/encap_cv 0.36 % 0.36 % 10 0.37% 0.36%
kyber1024/encap_min 223 us 223 us 10 400.254k 4.44088k/s
kyber1024/encap_max 225 us 225 us 10 404.723k 4.48965k/s
Load Average: 3.51, 3.90, 2.28
-------------------------------------------------------------------------------------
Benchmark Time CPU Iterations items_per_second
-------------------------------------------------------------------------------------
ml_kem_1024/decap_mean 258 us 258 us 10 3.87579k/s
ml_kem_1024/decap_median 258 us 258 us 10 3.88038k/s
ml_kem_1024/decap_stddev 0.963 us 0.959 us 10 14.346/s
ml_kem_1024/decap_cv 0.37 % 0.37 % 10 0.37%
ml_kem_1024/decap_min 257 us 257 us 10 3.84585k/s
ml_kem_1024/decap_max 260 us 260 us 10 3.89065k/s
ml_kem_768/decap_mean 174 us 174 us 10 5.7436k/s
ml_kem_768/decap_median 174 us 174 us 10 5.74181k/s
ml_kem_768/decap_stddev 0.323 us 0.324 us 10 10.6771/s
ml_kem_768/decap_cv 0.19 % 0.19 % 10 0.19%
ml_kem_768/decap_min 174 us 174 us 10 5.72691k/s
ml_kem_768/decap_max 175 us 175 us 10 5.75986k/s
ml_kem_768/keygen_mean 119 us 119 us 10 8.40489k/s
ml_kem_768/keygen_median 119 us 119 us 10 8.4065k/s
ml_kem_768/keygen_stddev 0.217 us 0.237 us 10 16.7154/s
ml_kem_768/keygen_cv 0.18 % 0.20 % 10 0.20%
ml_kem_768/keygen_min 119 us 119 us 10 8.37403k/s
ml_kem_768/keygen_max 119 us 119 us 10 8.43292k/s
ml_kem_1024/encap_mean 216 us 216 us 10 4.6302k/s
ml_kem_1024/encap_median 216 us 216 us 10 4.63436k/s
ml_kem_1024/encap_stddev 1.03 us 1.02 us 10 21.7423/s
ml_kem_1024/encap_cv 0.48 % 0.47 % 10 0.47%
ml_kem_1024/encap_min 215 us 215 us 10 4.59301k/s
ml_kem_1024/encap_max 218 us 218 us 10 4.65477k/s
ml_kem_512/decap_mean 109 us 109 us 10 9.21521k/s
ml_kem_512/decap_median 108 us 108 us 10 9.22127k/s
ml_kem_512/decap_stddev 0.248 us 0.243 us 10 20.5809/s
ml_kem_512/decap_cv 0.23 % 0.22 % 10 0.22%
ml_kem_512/decap_min 108 us 108 us 10 9.17837k/s
ml_kem_512/decap_max 109 us 109 us 10 9.24305k/s
ml_kem_768/encap_mean 140 us 140 us 10 7.12907k/s
ml_kem_768/encap_median 140 us 140 us 10 7.13583k/s
ml_kem_768/encap_stddev 0.597 us 0.596 us 10 30.1105/s
ml_kem_768/encap_cv 0.43 % 0.42 % 10 0.42%
ml_kem_768/encap_min 140 us 140 us 10 7.05566k/s
ml_kem_768/encap_max 142 us 142 us 10 7.16165k/s
ml_kem_1024/keygen_mean 188 us 188 us 10 5.32413k/s
ml_kem_1024/keygen_median 188 us 188 us 10 5.32187k/s
ml_kem_1024/keygen_stddev 0.537 us 0.534 us 10 15.1453/s
ml_kem_1024/keygen_cv 0.29 % 0.28 % 10 0.28%
ml_kem_1024/keygen_min 187 us 187 us 10 5.29511k/s
ml_kem_1024/keygen_max 189 us 189 us 10 5.34655k/s
ml_kem_512/encap_mean 83.7 us 83.7 us 10 11.9524k/s
ml_kem_512/encap_median 83.5 us 83.5 us 10 11.9776k/s
ml_kem_512/encap_stddev 0.421 us 0.420 us 10 59.8055/s
ml_kem_512/encap_cv 0.50 % 0.50 % 10 0.50%
ml_kem_512/encap_min 83.2 us 83.2 us 10 11.8419k/s
ml_kem_512/encap_max 84.4 us 84.4 us 10 12.0191k/s
ml_kem_512/keygen_mean 69.2 us 69.2 us 10 14.4436k/s
ml_kem_512/keygen_median 69.2 us 69.2 us 10 14.4496k/s
ml_kem_512/keygen_stddev 0.267 us 0.269 us 10 55.9869/s
ml_kem_512/keygen_cv 0.39 % 0.39 % 10 0.39%
ml_kem_512/keygen_min 68.9 us 68.9 us 10 14.3569k/s
ml_kem_512/keygen_max 69.7 us 69.7 us 10 14.5198k/s
```
### On Apple M1 Max
Compiled with **Apple clang version 15.0.0 (clang-1500.1.0.2.5)**.
Compiled with **Apple clang version 15.0.0 (clang-1500.3.9.4)**.
```bash
$ uname -srm
Darwin 23.2.0 arm64
Darwin 23.5.0 arm64
```
```bash
2024-01-22T19:33:49+04:00
2024-06-18T21:24:57+04:00
Running ./build/bench.out
Run on (10 X 24 MHz CPU s)
CPU Caches:
L1 Data 64 KiB
L1 Instruction 128 KiB
L2 Unified 4096 KiB (x10)
Load Average: 2.44, 2.58, 2.80
-----------------------------------------------------------------------------------
Benchmark Time CPU Iterations items_per_second
-----------------------------------------------------------------------------------
kyber768/keygen_mean 20.2 us 20.2 us 10 49.5202k/s
kyber768/keygen_median 20.2 us 20.2 us 10 49.5691k/s
kyber768/keygen_stddev 0.078 us 0.075 us 10 182.819/s
kyber768/keygen_cv 0.39 % 0.37 % 10 0.37%
kyber768/keygen_min 20.2 us 20.1 us 10 49.0094k/s
kyber768/keygen_max 20.5 us 20.4 us 10 49.6414k/s
kyber1024/encap_mean 38.4 us 38.3 us 10 26.1344k/s
kyber1024/encap_median 38.3 us 38.2 us 10 26.1544k/s
kyber1024/encap_stddev 0.130 us 0.127 us 10 86.5122/s
kyber1024/encap_cv 0.34 % 0.33 % 10 0.33%
kyber1024/encap_min 38.2 us 38.1 us 10 25.957k/s
kyber1024/encap_max 38.6 us 38.5 us 10 26.2225k/s
kyber512/keygen_mean 12.0 us 11.9 us 10 83.7302k/s
kyber512/keygen_median 12.0 us 11.9 us 10 83.7409k/s
kyber512/keygen_stddev 0.019 us 0.020 us 10 141.747/s
kyber512/keygen_cv 0.16 % 0.17 % 10 0.17%
kyber512/keygen_min 11.9 us 11.9 us 10 83.5254k/s
kyber512/keygen_max 12.0 us 12.0 us 10 83.9197k/s
kyber768/encap_mean 25.0 us 24.9 us 10 40.0959k/s
kyber768/encap_median 25.0 us 24.9 us 10 40.106k/s
kyber768/encap_stddev 0.053 us 0.056 us 10 89.5965/s
kyber768/encap_cv 0.21 % 0.22 % 10 0.22%
kyber768/encap_min 24.9 us 24.8 us 10 39.9002k/s
kyber768/encap_max 25.1 us 25.1 us 10 40.2567k/s
kyber1024/keygen_mean 32.3 us 32.2 us 10 31.0263k/s
kyber1024/keygen_median 32.3 us 32.2 us 10 31.0496k/s
kyber1024/keygen_stddev 0.100 us 0.098 us 10 94.0295/s
kyber1024/keygen_cv 0.31 % 0.31 % 10 0.30%
kyber1024/keygen_min 32.2 us 32.2 us 10 30.7662k/s
kyber1024/keygen_max 32.6 us 32.5 us 10 31.0832k/s
kyber768/decap_mean 26.2 us 26.1 us 10 38.2517k/s
kyber768/decap_median 26.2 us 26.1 us 10 38.2788k/s
kyber768/decap_stddev 0.072 us 0.071 us 10 103.849/s
kyber768/decap_cv 0.27 % 0.27 % 10 0.27%
kyber768/decap_min 26.1 us 26.1 us 10 37.9778k/s
kyber768/decap_max 26.4 us 26.3 us 10 38.3546k/s
kyber512/encap_mean 15.2 us 15.1 us 10 66.0548k/s
kyber512/encap_median 15.2 us 15.1 us 10 66.0441k/s
kyber512/encap_stddev 0.019 us 0.018 us 10 76.3748/s
kyber512/encap_cv 0.13 % 0.12 % 10 0.12%
kyber512/encap_min 15.1 us 15.1 us 10 65.9247k/s
kyber512/encap_max 15.2 us 15.2 us 10 66.1939k/s
kyber1024/decap_mean 39.7 us 39.6 us 10 25.2636k/s
kyber1024/decap_median 39.7 us 39.6 us 10 25.2559k/s
kyber1024/decap_stddev 0.052 us 0.047 us 10 30.0564/s
kyber1024/decap_cv 0.13 % 0.12 % 10 0.12%
kyber1024/decap_min 39.6 us 39.5 us 10 25.2259k/s
kyber1024/decap_max 39.8 us 39.6 us 10 25.3094k/s
kyber512/decap_mean 16.1 us 16.1 us 10 62.1168k/s
kyber512/decap_median 16.1 us 16.1 us 10 62.1323k/s
kyber512/decap_stddev 0.023 us 0.024 us 10 93.9076/s
kyber512/decap_cv 0.14 % 0.15 % 10 0.15%
kyber512/decap_min 16.1 us 16.1 us 10 61.9199k/s
kyber512/decap_max 16.2 us 16.1 us 10 62.2184k/s
Load Average: 2.12, 4.39, 7.54
-------------------------------------------------------------------------------------
Benchmark Time CPU Iterations items_per_second
-------------------------------------------------------------------------------------
ml_kem_768/keygen_mean 20.7 us 20.7 us 10 48.4041k/s
ml_kem_768/keygen_median 20.7 us 20.7 us 10 48.4089k/s
ml_kem_768/keygen_stddev 0.031 us 0.029 us 10 68.1992/s
ml_kem_768/keygen_cv 0.15 % 0.14 % 10 0.14%
ml_kem_768/keygen_min 20.6 us 20.6 us 10 48.2768k/s
ml_kem_768/keygen_max 20.7 us 20.7 us 10 48.5023k/s
ml_kem_1024/keygen_mean 32.5 us 32.5 us 10 30.8076k/s
ml_kem_1024/keygen_median 32.4 us 32.4 us 10 30.8861k/s
ml_kem_1024/keygen_stddev 0.159 us 0.161 us 10 152.372/s
ml_kem_1024/keygen_cv 0.49 % 0.50 % 10 0.49%
ml_kem_1024/keygen_min 32.4 us 32.3 us 10 30.5386k/s
ml_kem_1024/keygen_max 32.8 us 32.7 us 10 30.9448k/s
ml_kem_768/encap_mean 22.7 us 22.7 us 10 44.144k/s
ml_kem_768/encap_median 22.7 us 22.7 us 10 44.1494k/s
ml_kem_768/encap_stddev 0.037 us 0.037 us 10 72.779/s
ml_kem_768/encap_cv 0.16 % 0.16 % 10 0.16%
ml_kem_768/encap_min 22.6 us 22.6 us 10 43.9993k/s
ml_kem_768/encap_max 22.8 us 22.7 us 10 44.26k/s
ml_kem_768/decap_mean 26.7 us 26.6 us 10 37.5449k/s
ml_kem_768/decap_median 26.6 us 26.6 us 10 37.5935k/s
ml_kem_768/decap_stddev 0.108 us 0.098 us 10 137.284/s
ml_kem_768/decap_cv 0.40 % 0.37 % 10 0.37%
ml_kem_768/decap_min 26.6 us 26.5 us 10 37.2779k/s
ml_kem_768/decap_max 26.9 us 26.8 us 10 37.6739k/s
ml_kem_512/keygen_mean 12.1 us 12.1 us 10 82.8747k/s
ml_kem_512/keygen_median 12.1 us 12.1 us 10 82.9135k/s
ml_kem_512/keygen_stddev 0.016 us 0.018 us 10 120.443/s
ml_kem_512/keygen_cv 0.13 % 0.15 % 10 0.15%
ml_kem_512/keygen_min 12.1 us 12.0 us 10 82.7218k/s
ml_kem_512/keygen_max 12.1 us 12.1 us 10 83.0684k/s
ml_kem_512/encap_mean 13.4 us 13.4 us 10 74.4965k/s
ml_kem_512/encap_median 13.4 us 13.4 us 10 74.512k/s
ml_kem_512/encap_stddev 0.016 us 0.016 us 10 88.0048/s
ml_kem_512/encap_cv 0.12 % 0.12 % 10 0.12%
ml_kem_512/encap_min 13.4 us 13.4 us 10 74.3506k/s
ml_kem_512/encap_max 13.5 us 13.4 us 10 74.6472k/s
ml_kem_1024/encap_mean 35.5 us 35.4 us 10 28.2336k/s
ml_kem_1024/encap_median 35.5 us 35.4 us 10 28.209k/s
ml_kem_1024/encap_stddev 0.133 us 0.134 us 10 106.629/s
ml_kem_1024/encap_cv 0.38 % 0.38 % 10 0.38%
ml_kem_1024/encap_min 35.3 us 35.2 us 10 28.0729k/s
ml_kem_1024/encap_max 35.6 us 35.6 us 10 28.3909k/s
ml_kem_1024/decap_mean 40.4 us 40.3 us 10 24.8064k/s
ml_kem_1024/decap_median 40.4 us 40.3 us 10 24.8086k/s
ml_kem_1024/decap_stddev 0.066 us 0.070 us 10 42.8027/s
ml_kem_1024/decap_cv 0.16 % 0.17 % 10 0.17%
ml_kem_1024/decap_min 40.3 us 40.2 us 10 24.734k/s
ml_kem_1024/decap_max 40.5 us 40.4 us 10 24.8586k/s
ml_kem_512/decap_mean 16.4 us 16.3 us 10 61.1867k/s
ml_kem_512/decap_median 16.4 us 16.3 us 10 61.1979k/s
ml_kem_512/decap_stddev 0.024 us 0.022 us 10 81.9971/s
ml_kem_512/decap_cv 0.15 % 0.13 % 10 0.13%
ml_kem_512/decap_min 16.3 us 16.3 us 10 61.0308k/s
ml_kem_512/decap_max 16.4 us 16.4 us 10 61.308k/s
```
## Usage
`kyber` is written as a header-only C++ library, majorly targeting 64 -bit platforms and it's pretty easy to get started with. All you need to do is following.
`kyber` is written as a header-only C++20 `constexpr` library, majorly targeting 64 -bit desktop/ server grade platforms and it's pretty easy to get started with. All you need to do is following.
- Clone `kyber` repository.
@@ -389,12 +381,12 @@ git clone https://github.com/itzmeanjan/kyber.git && pushd kyber && git submodul
git clone https://github.com/itzmeanjan/kyber.git --recurse-submodules
```
- Write your program while including proper header files ( based on which variant of Kyber KEM you want to use, see [include](./include) directory ), which includes declarations ( and definitions ) of all required KEM routines and constants ( such as byte length of public/ private keys and cipher text ).
- Write your program while including proper header files ( based on which variant of ML-KEM you want to use, see [include](./include) directory ), which includes declarations ( and definitions ) of all required ML-KEM routines and constants ( such as byte length of public/ private key, cipher text etc. ).
```cpp
// main.cpp
#include "kyber512_kem.hpp"
#include "ml_kem/ml_kem_512.hpp"
#include <algorithm>
#include <array>
#include <cassert>
@@ -402,38 +394,37 @@ git clone https://github.com/itzmeanjan/kyber.git --recurse-submodules
int
main()
{
std::array<uint8_t, 32> d{}; // seed
std::array<uint8_t, 32> z{}; // seed
std::array<uint8_t, kyber512_kem::PKEY_LEN> pkey{};
std::array<uint8_t, kyber512_kem::SKEY_LEN> skey{};
std::array<uint8_t, 32> m{}; // seed
std::array<uint8_t, kyber512_kem::CIPHER_LEN> cipher{};
std::array<uint8_t, ml_kem_512::SEED_D_BYTE_LEN> d{};
std::array<uint8_t, ml_kem_512::SEED_Z_BYTE_LEN> z{};
std::array<uint8_t, ml_kem_512::PKEY_BYTE_LEN> pkey{};
std::array<uint8_t, ml_kem_512::SKEY_BYTE_LEN> skey{};
std::array<uint8_t, ml_kem_512::SEED_M_BYTE_LEN> m{};
std::array<uint8_t, ml_kem_512::CIPHER_TEXT_BYTE_LEN> cipher{};
std::array<uint8_t, ml_kem_512::SHARED_SECRET_BYTE_LEN> sender_key{};
std::array<uint8_t, ml_kem_512::SHARED_SECRET_BYTE_LEN> receiver_key{};
// Be careful !
//
// Read API documentation in include/prng.hpp
prng::prng_t prng;
// Read API documentation in include/ml_kem/internals/rng/prng.hpp
ml_kem_prng::prng_t<128> prng;
prng.read(d);
prng.read(z);
prng.read(m);
kyber512_kem::keygen(d, z, pkey, skey);
auto skdf = kyber512_kem::encapsulate(m, pkey, cipher);
auto rkdf = kyber512_kem::decapsulate(skey, cipher);
ml_kem_512::keygen(d, z, pkey, skey);
assert(ml_kem_512::encapsulate(m, pkey, cipher, sender_key)); // Key Encapsulation might fail, if input public key is malformed
ml_kem_512::decapsulate(skey, cipher, receiver_key);
std::array<uint8_t, 32> sender_key{};
skdf.squeeze(sender_key);
std::array<uint8_t, 32> receiver_key{};
rkdf.squeeze(receiver_key);
assert(std::ranges::equal(sender_key, receiver_key));
assert(sender_key == receiver_key);
return 0;
}
```
- When compiling your program, let your compiler know where it can find `kyber`, `sha3` and `subtle` headers, which includes their definitions ( kyber being a header-only library ) too.
- When compiling your program, let your compiler know where it can find `kyber`, `sha3` and `subtle` headers, which includes their definitions ( all of them are header-only libraries ) too.
```bash
# Assuming `kyber` was cloned just under $HOME
@@ -442,35 +433,35 @@ KYBER_HEADERS=~/kyber/include
SHA3_HEADERS=~/kyber/sha3/include
SUBTLE_HEADERS=~/kyber/subtle/include
g++ -std=c++20 -Wall -O3 -march=native -I $KYBER_HEADERS -I $SHA3_HEADERS -I $SUBTLE_HEADERS main.cpp
g++ -std=c++20 -Wall -Wextra -pedantic -O3 -march=native -I $KYBER_HEADERS -I $SHA3_HEADERS -I $SUBTLE_HEADERS main.cpp
```
Kyber KEM Variant | Namespace | Header
ML-KEM Variant | Namespace | Header
:-- | :-: | --:
Kyber512 KEM Routines | `kyber512_kem::` | [include/kyber512_kem.hpp](include/kyber512_kem.hpp)
Kyber768 KEM Routines | `kyber768_kem::` | [include/kyber768_kem.hpp](include/kyber768_kem.hpp)
Kyber1024 KEM Routines | `kyber1024_kem::` | [include/kyber1024_kem.hpp](include/kyber1024_kem.hpp)
ML-KEM-512 Routines | `ml_kem_512::` | `include/ml_kem/ml_kem_512.hpp`
ML-KEM-768 Routines | `ml_kem_768::` | `include/ml_kem/ml_kem_768.hpp`
ML-KEM-1024 Routines | `ml_kem_1024::` | `include/ml_kem/ml_kem_1024.hpp`
> [!NOTE]
> Kyber parameter sets are selected from table 1 of Kyber specification https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf.
> ML-KEM parameter sets are taken from table 2 of ML-KEM draft standard @ https://doi.org/10.6028/NIST.FIPS.203.ipd.
See example [program](./examples/kyber512_kem.cpp), where I show how to use Kyber512 KEM API. You can almost similarly use Kyber768 or Kyber1024 KEM API, by just importing correct header file and using KEM functions/ constants from respective namespace.
See example [program](./examples/ml_kem_768.cpp), where I show how to use ML-KEM-512 API.
```bash
g++ -std=c++20 -Wall -Wextra -pedantic -O3 -march=native -I ./include -I ./sha3/include -I ./subtle/include/ examples/kyber512_kem.cpp && ./a.out
g++ -std=c++20 -Wall -Wextra -pedantic -O3 -march=native -I ./include -I ./sha3/include -I ./subtle/include/ examples/ml_kem_768.cpp && ./a.out
```
```bash
Kyber512 KEM
pubkey : 175782d35b2666833aee098617626d88dbcc47091a011882d52105acc218c9287a95276a3259a6a94aa386d8148886abdcc1841f39260ce4754ebacc1fd36102905d4c623d0b27930b4c249ee7380758c0ac5982b0e932eda95184a40f55c451d835861ca2b314dbce97829f1b92752dda592d8960b2540f464988ea1c974c63467c439b1de540490b0af0491a6507951ebc971887bd2b4a11327381d99586f10668c83abe92fb649b113da7ec666729bc1cc38a1de137dd3cc4e3a6abb9881a2ee63e7df3ad6cb680664ba1559ca17448c968b7c867ac5f324911ffd43993b8a7b8f57094c786877c1208fa7f53e51d6f1a46ae71bc81f78ebe5808d48200b7e1bc81ec3d31070a6993aa5db237eb3a4c592aa559a73bd769583a0ad095ec1669b952be4a71fe8603f5d597f007a048cc9d7fea6735383b6b8bbf896b74dc48a21840a92c497a9bc7434b0241a9e42e6428515d477c4e0b3678fab1d619b794f01b828648e7577bb2e5297915b9fdf33cb291a37de51b51c7aca6f07994193bd981134da2340c23a93cda8b68e429ac801d3748b8d112b57e388511e3305e50a51184b623607447468be94351cd0b9111a119b4b3c6f270c1cfea749a2ac89455590280c369163946481dbaeb4693dbb376202db2d8464c61aea6411cd887080f5c59e1587da01510cd1b0e8b030a5c200639ba26376134e88279891b90373cc92e7a76c0aaa33d084ab3f61e175010996652e441300ad5aefda9cc88f17fef2102b643179e0a49a60c47ce06c5b1a0b150b09ca4593e5dd48a9b1979d103ba862c43ed354d2ec99575b70e741808288aa0e1cb792c0a458d4584ddfa1870d7b797e2aac7d4cc08916015401338d8841d226d9656661cda93f53343e0f906b82bce8f25428b02a639a47f7dda5b946a3785656fb6d083df5a5ec7493cc017a2469b1f43c96f2e3bbc9d6cb07bec82d721a4cfba6ca2c59b0e01bda98585692b9da753923f830b52c843b6d963f959ad60189f42d61df7808f4d131c4d233e246c4735193e516452061701e6114cf1587a54c79105f48fdce9c2134bb60550b242945ea011ec54c570054b93d96f072426b7c9b524db8d2f136b7db2d1f38897
seckey : a598a250c2008688af8f71a285abae5b528a19479acf915cd2f92a7365bc757c670accc4b2190aa77b7d0c76355962a0ea9b6a1f4400be77797a6851776815032307913aa475b733a1ba698b2134ea25a57bd9b979e2bcb7d99f24f06ee760227486ae1cdaba79065bc3180d79a0906c514e5b973435c00f34b87e882643ef6b42bcbca4a3b65207abb5dca76e49a9be7a6013d256bc09b1211b70bb28e2151200c6c1e00082e88634600a29e3cf5ff541051c703ac373a91228a6d30491221df6749e22b21429612ed4ba07a7d7789717809e498f2e3a1b8a6a40afe0a7d2460350074a2a5127cc20c0b03446977a612a096324337cd5bc455f77cdbe4600e147b02fe58bc9c383b1e84ea3bc5755d3a87ce515b07c96741a72d9eb702d445acc3374531c70ef221216db2c9198110d83084f7b508da18fd34b8ee9f45d1204a627609d09a89c73e8bfc1f987c6bc906fac0d01720b169061b3d8015a5121a0d3beab454a03cc24a9bc5725e6c4aaf44d8f8b7242443f289c0751226448ec794a02a1ba411caeabb99f7a90510b8812a91a0ad69f1476408940381724a1dbfb7f69642788267b068c585bf41bf3f857fd14bcd9506b95b6a257d7481a07628004944e136a4c97842c13451e960cf4e08a8b6666e17a6aac016d701c1a00c82072939a092397c7104d7fd6332b860034f2ace5191e2792cf10e21f5166304bf329696128d63640b7882809b750f1f89e5d513fa08a8439e1ad5fe0affd887b0f06ab91798c35d48f39261af3dab7ddbc899be21d1f751b8d317b8e280f400b637ac6a471b4065973a6253235c94117e22083562b715ce680fafb78da9113f0f52692c52625ea8e1c24a1d8837beb963a5ab078455c8a43cbab68dc4eaa4c7646c9bb45803442250e935738944c7228aa3f7137567eb1231c63bff7552a7858525b92bdca832a41cb20fb24647a62af1da27e50c41f3cd070ed8c1d1213c22b1540a5c1412d67ab4ff334c2e5217c06a5f8a93c0637bd0fb4736c19591f67c378aa80f7c9587b346bbfe81eff8574c7e0acc3164c3df048019639a80377b97457175782d35b2666833aee098617626d88dbcc47091a011882d52105acc218c9287a95276a3259a6a94aa386d8148886abdcc1841f39260ce4754ebacc1fd36102905d4c623d0b27930b4c249ee7380758c0ac5982b0e932eda95184a40f55c451d835861ca2b314dbce97829f1b92752dda592d8960b2540f464988ea1c974c63467c439b1de540490b0af0491a6507951ebc971887bd2b4a11327381d99586f10668c83abe92fb649b113da7ec666729bc1cc38a1de137dd3cc4e3a6abb9881a2ee63e7df3ad6cb680664ba1559ca17448c968b7c867ac5f324911ffd43993b8a7b8f57094c786877c1208fa7f53e51d6f1a46ae71bc81f78ebe5808d48200b7e1bc81ec3d31070a6993aa5db237eb3a4c592aa559a73bd769583a0ad095ec1669b952be4a71fe8603f5d597f007a048cc9d7fea6735383b6b8bbf896b74dc48a21840a92c497a9bc7434b0241a9e42e6428515d477c4e0b3678fab1d619b794f01b828648e7577bb2e5297915b9fdf33cb291a37de51b51c7aca6f07994193bd981134da2340c23a93cda8b68e429ac801d3748b8d112b57e388511e3305e50a51184b623607447468be94351cd0b9111a119b4b3c6f270c1cfea749a2ac89455590280c369163946481dbaeb4693dbb376202db2d8464c61aea6411cd887080f5c59e1587da01510cd1b0e8b030a5c200639ba26376134e88279891b90373cc92e7a76c0aaa33d084ab3f61e175010996652e441300ad5aefda9cc88f17fef2102b643179e0a49a60c47ce06c5b1a0b150b09ca4593e5dd48a9b1979d103ba862c43ed354d2ec99575b70e741808288aa0e1cb792c0a458d4584ddfa1870d7b797e2aac7d4cc08916015401338d8841d226d9656661cda93f53343e0f906b82bce8f25428b02a639a47f7dda5b946a3785656fb6d083df5a5ec7493cc017a2469b1f43c96f2e3bbc9d6cb07bec82d721a4cfba6ca2c59b0e01bda98585692b9da753923f830b52c843b6d963f959ad60189f42d61df7808f4d131c4d233e246c4735193e516452061701e6114cf1587a54c79105f48fdce9c2134bb60550b242945ea011ec54c570054b93d96f072426b7c9b524db8d2f136b7db2d1f3889778f791d583227a702cdfa4a9f95014df019495f14e02318b3704dc3794af523705be75f29753f47b2888ceef235d82caca9f983b40bf10b29672da272113a973
cipher : bcee459c896ea378dcc458a532c35c029eff6b8cf8adc83f484fb6f9bfe32612f7c936cbf4dbd7c5262288dc3966a0d769f94a0bd57913a60a71efae09321c22c53839d836cef5fb8bf5c630bd3b3d657492eabfc7e67a42a631c95391656f0fce607a181e418144dff3d97f1192a2825a94da5113bcffc2e5f3e043f7583e6159902ddd009f8bcb18046a05695917bdef48accc2e3708f8536aabb420a7fd7989c60bca6c1941af45eac2f03cf71c8506721f8cd69bd3c573f036e3e8ae72b85632d06e0cab6fa1fea078d84aa1a116ac58ee632a0542b2d0e6a7026ae814ceeb46478d1cefd082c9b19efa7bb6ddd7abda8e43eab7b5a5204449273ea056b36d3797371f855d0c7ff0436279b21b831ad0970c26cc39f8627deb932689b8df48e73b1b5893987fa4dbc65571a78287f1573beeb85db52a3edbad6f50725bcbfa40423e3ce1ab00c16ea3922bc42e6782ce224ccfb3c978d8704584b9768a8edb6a950c0208b1c1c9a6a4e0d6300a9cfe788389697460efc41308448e9752d2022dfdecd118440346e2fabb07559b76301943f3b186adaaba09828efb28db1cd4a5e82e01f360451cb3c487f371af05725ea0e7d61932a8dc38108e99182e9b50d2aa828a773a2e18f5271ac75e5a5c50b9221f893e5f7076732beb0ffb9e4b82e1c0648192c9547870372b78c6a3e3a1b00d904a4a1492d5944e0510acee62e40c78cecef97922b04807cdd47d4d403a7bb16316598e6eee760b257382d9648c9920c3395717d8ac829bd37465c0f3e7f0c7e6fc351aac802edb722200776906eb36f622c0b8702958e44317961f583265a83b8cfcd9eed80f15b9ef848ebb7355df9718a60c532e20074854797685b3e4a25f929fce9ad02a5af114f92210abd3b73fddf28f116c2d4c27ceda6428a3892eb0c18fc12b07596e4153f2a3df9aa440957704bc56bbbee06cd99def3218c046344b4c5a811840a088bcbbad76fca4a20b9bf608873b2830afd6097b05022e8b1d42af3e5e4f00303adc9f130a84cdde3fef9335ccd1120b3f2050f17ef0c10fd226268965cbfc13738ada0632
shared secret : 508ac79bf97e90d75267159ba5189b73c48ab41a91aec0f32edd6cd1e66465b5
ML-KEM-768
Pubkey : 6653a1f5242faad7b37863433dc56538957f3c412102a17d28bc328c4781c566331f8c0b77093baef24a58d6312ddc719ac67ac2874f3adc8a3e6530adbc14cc069159a99e56277895c17c04da1644db23a6e9c16f31c21959400a8abd483a3fcfc0c5fd759917322a66a2aa77a6956f3b8387443640746b0ac8a282521dd784332d56aa745898c3fcc60a56a0716931bbe69b26c4514d529c79979355c8b40eb97fe7c485ceaa45d610145b4bce7da6343db46b6bf42182931a3ed98bafb66614e024cf8c9e51a90b1fc3702a2b4fe3b0c537fa9a1680b4d2f2044c557b1819300a6225be6c234d07d06a702eeb7110ef05c8973b0cab182efbb9ba07811b87b24e2a652cb428240c53423efcaf201973bf3342e86a8d477191d3544217f143586ba351fb7729ac8a51a51c8ab719fd3568c615a7a438b8967301754cac96a8552af82d8ce8840da56cb7481ad54581904c0d390732eceb23df4483cd7593d949bb0c985042f71018862b0d126702a7b55c8c7d9d44cfea157d4013c57ceb18bcfc2c95d8bb8d6178e0ac738ffcc1b3fca525a7ae83652e0b75836fe6c77d182626a8ca85262a17bc60645105a503b2f0f707e765552d49979273b0cb5870124933c6557ef795b36bf093f6c35ef722c9b2854999d20b5fd23dc6d2381ef38bcf547e37faa8ccda3dea2409deda7992a1951849ce3e7b11f3f98cb0d2283e458af854af9d74c57516a924e74222e9bcac529e88a02913d9ae29ec3cc42269d08ca1bf13a941f95d0bb05da9ac4a1ea2bb86b4c631853ec5f2129834a70ba923c8f1bc3fd5cad8692ef4401417b362f0e729497633794abc21e95a59319403002085b113a7b0544165210c1726346a088a933347a265ba055429e637fc40b111d38446461d77546166f923e5249427e5c62092b6ee2a2b585273c3d545b99673419194e54978d71ca606e238a053988db999904207b8326c5b27a38966c4d99460386c453d12821602b444320da205c980da3ab9d3461d405601a7226c143cf4492b8bf4c63a949a8ad81224c71005abcf6afb4ba7ba94ee437079494f9d78c69ac950711765a7e50ab42ba6bf64a5a7d30e64d14fb845ab37b4cc5b099c44e3cf4f9bc61f3640b5b98560474f9f1054dce9be10db77dea35a2375c66d0a26d7f73e7385b03ca8194dbaaf601184f826bd0a86b1d023ae9548b6d4602cd25c3f46e1c66dca6fd183007d043f6f3a6f5a56089180744d579bfe3cc65f003d91084adac4c0ac5811ea8a3e5aa6500eac125a423000297a7975585a083bcfc807b7722b5a0438c1b11b62abcb9a4623f8090c451690455acf97814074c0b6d6d19180f08afbd5ba0e259ee910a61f684d14e7996e47ba5a19994a13a642a1bb563411979bcaf7b302d70ba750a89867653b93596d03b260c7c0a024949bb7b1b110dc8267d5c5305390da26c7e6296add6ba7533b92540c28b337c5b392a6024c57cc09b899ec72e1466de604aa0c909baeb0b0078324e810481f760ae694b5e88cb034bca48bf70881047c7b6ab7ce04b96ad2bd0142b387bc1824a22742c7ce18ebac7744a616a5631e40ab817426c6130ece8641661ab863c44c2adb64029990aea24c94bec0ad7bfba46cd1894775ac6549b1a63446cade59357e125c589a73
Seckey : 7fc5b38c54324b214d9db93e4069c03a097931156a9e18291ac9b0ac787dfcd189a5971b3dd79fab1b5bacbb35a8d33d08faa3493c8d26886c0f487bee757b16837799912bbd99200502227ff510a4c39d924434af8233220084888b134fb608b9d424c66b8ec5c34069327c5ac591ee0b774ef42f922aceeae21d44738b74e644c5dc0fff4c8ed2cb25f495126840442370159a7c5bb6631c4b7aa4d0445a1fa63c104b492e986e1109bcab369fa0889ca41202d22c8511b8a35719163336a87ea672798168038439ab5645acf813f773a0a575169250c2a2062618aa3b77d015750abaf6a707019945e590beaf6cc499b8305ef66583e42c24f9b2d720ac286a8b513261d36a4c34eb7a6a012488aa3e58553e5245ac7e7089d327568ec66e9bccc96fa5ce0bcc8b1fda3343857adc564d4a631e695b7ecea241947561519b48a75b4f89ab992f201d3f5c958254c9c8653ee2d08756388152db8284d0a48db1a7b96282e4b168cf16a51362534c4c90ca21c28f5b76a12c6042f61a7de52c2bf8901f3738cec81c1ac75399b63320217ce8833e06c0b022b5883415009e08428cc07570660cd14a818cc2ad1a00aeb011b9622a48dde6632277b784d28651c17438d8457e67a6dbc016e387cdabf15e3c456c17c1b3825376bdb3c3f418ac251c96605b208f35c0027559405240bc419303ea9d1c9a36c10ccc0b443782e1c1e860ad0af914ab7a53cfaa0d7ef1100be74da5b43fcfe4bc60e7397cf009ddda713eb7610a15716e9b9e7ac55269c9494bc910a3753c0a0942ec654f1500826f900bb4776dde03166ef376dd67cc385406d2f4047ffc819b2657e596c964189727f3b8a9705f4dc011fda20c73846f62146085547d872519636697e16c7e89d741c917974e490812482ab704099e003acab375f1376536b85c5c12196b2a7514461a2fc2032bb65164f6cbe16c8fb6754a3cd75bb26635a1f5b96d113dfb1a3b301438608c39be68adb5b359d00b0b39a77f7167a18c75532b18798aea17d6ea64bf798c3ec0438a1c94d3445f906985f3791a3f4a98df13cd0af24d8feba0dbeb08b42635327c31cff5b6da791ad1c33055b18fa6d8b5c7f454c39a70d1e86f96660cea737e7ca52014a6c662e346a09bc25e4027602b4472dc93ee06c91a698be675bb39d2319e91a083b92e4b7b0bcc38771977c300e889ae60bb38b97ca68836d0bcabde4b6a0d70042ee67e051a1503927c422593df1668bb789d7060cc6b09d0a891384d5bbe90b480ba93afa99a079ea85e28a0236437a0ba397705356b0f90757e754f34fb5a591b3888eb5d719b79f462abab333ed078bb90c4b11dec276713c4cb06bca9d98068f48b90802451f69d2acb08abe66c61f6081f008d04fa7bad176da4201c9c2a3b3b29b3e7734b1961b6e2ea8d963350d8301a2dc48ce2e056156275cae8c23665b47a5a49c1abc5a01c98cee22a9dd7584c15b304e5c61cd449453208417075a6c3b999f626795379f1d556da168832a47ac638ce89a59c5a9c0a9746ad219142cf782280311b9cea87598c46ba673ac30a281a052dcd710e1bac328d822c19db8ee7e38925c7378431937b812e03382963b9c26653a1f5242faad7b37863433dc56538957f3c412102a17d28bc328c4781c566331f8c0b77093baef24a58d6312ddc719ac67ac2874f3adc8a3e6530adbc14cc069159a99e56277895c17c04da1644db23a6e9c16f31c21959400a8abd483a3fcfc0c5fd759917322a66a2aa77a6956f3b8387443640746b0ac8a282521dd784332d56aa745898c3fcc60a56a0716931bbe69b26c4514d529c79979355c8b40eb97fe7c485ceaa45d610145b4bce7da6343db46b6bf42182931a3ed98bafb66614e024cf8c9e51a90b1fc3702a2b4fe3b0c537fa9a1680b4d2f2044c557b1819300a6225be6c234d07d06a702eeb7110ef05c8973b0cab182efbb9ba07811b87b24e2a652cb428240c53423efcaf201973bf3342e86a8d477191d3544217f143586ba351fb7729ac8a51a51c8ab719fd3568c615a7a438b8967301754cac96a8552af82d8ce8840da56cb7481ad54581904c0d390732eceb23df4483cd7593d949bb0c985042f71018862b0d126702a7b55c8c7d9d44cfea157d4013c57ceb18bcfc2c95d8bb8d6178e0ac738ffcc1b3fca525a7ae83652e0b75836fe6c77d182626a8ca85262a17bc60645105a503b2f0f707e765552d49979273b0cb5870124933c6557ef795b36bf093f6c35ef722c9b2854999d20b5fd23dc6d2381ef38bcf547e37faa8ccda3dea2409deda7992a1951849ce3e7b11f3f98cb0d2283e458af854af9d74c57516a924e74222e9bcac529e88a02913d9ae29ec3cc42269d08ca1bf13a941f95d0bb05da9ac4a1ea2bb86b4c631853ec5f2129834a70ba923c8f1bc3fd5cad8692ef4401417b362f0e729497633794abc21e95a59319403002085b113a7b0544165210c1726346a088a933347a265ba055429e637fc40b111d38446461d77546166f923e5249427e5c62092b6ee2a2b585273c3d545b99673419194e54978d71ca606e238a053988db999904207b8326c5b27a38966c4d99460386c453d12821602b444320da205c980da3ab9d3461d405601a7226c143cf4492b8bf4c63a949a8ad81224c71005abcf6afb4ba7ba94ee437079494f9d78c69ac950711765a7e50ab42ba6bf64a5a7d30e64d14fb845ab37b4cc5b099c44e3cf4f9bc61f3640b5b98560474f9f1054dce9be10db77dea35a2375c66d0a26d7f73e7385b03ca8194dbaaf601184f826bd0a86b1d023ae9548b6d4602cd25c3f46e1c66dca6fd183007d043f6f3a6f5a56089180744d579bfe3cc65f003d91084adac4c0ac5811ea8a3e5aa6500eac125a423000297a7975585a083bcfc807b7722b5a0438c1b11b62abcb9a4623f8090c451690455acf97814074c0b6d6d19180f08afbd5ba0e259ee910a61f684d14e7996e47ba5a19994a13a642a1bb563411979bcaf7b302d70ba750a89867653b93596d03b260c7c0a024949bb7b1b110dc8267d5c5305390da26c7e6296add6ba7533b92540c28b337c5b392a6024c57cc09b899ec72e1466de604aa0c909baeb0b0078324e810481f760ae694b5e88cb034bca48bf70881047c7b6ab7ce04b96ad2bd0142b387bc1824a22742c7ce18ebac7744a616a5631e40ab817426c6130ece8641661ab863c44c2adb64029990aea24c94bec0ad7bfba46cd1894775ac6549b1a63446cade59357e125c589a73dd18d5e8aad6acb35a89e0958c3ae122197bb6fed165733ca120172d11335a4d60d73fb91d0ffac552692219ef3082477a0f6399aa5dce8a72fd0afaa3b627c9
Encapsulated ? : true
Cipher : 1d04afad6cf4058acb290f72298587c8afb9e022fc0a4b3e1aa5fdc79cfbe44e7781317adbc1f92fd01a6ad3840386710a369276c50671d2b58272505793736bb9d0e8883c200270ddae19fbc86af41aba366b4ddfd67f8771905b3fccca6da805a1e13a9e697500779cfe52484811e906042fa6e6e93ef641e5e7a46c39969c4683ee7cb440fc4cc452dab5215d6ec32a36fa0e8d7501b5d7dcc9dbfb51cbb1c036b052a7354544a6707099ded7b5e5c5024e2a6f356b2d300585128a30d7b964842d5c06659990c85468b42f5f2b46c39b4fa740a3f7006da01ffa09fb2fd6b5b0e9174bd7a801972b647df2825842b8ad146220a1ddcc9eee6967954e8d960bbf5ea8a74ae0306061c44e2995eb451171bd3eb4679579922e48e713ad40cddcd14343dc57a181e3067f1b01895122a447cf002b600c96a30c5f809efcc459cebc8723ca5b5147d2f9d09186f31bba013f19e63294cc5a57c0184b838cb9d51c62e0303c9a029cf6a5c489ccb43bd0bdd4da61f147d6ef9c2b95a758d0c2b9a9265e7cf4255989c07799940c517ecd527cca2acf62d104e2d45a176e35852d81f42397c93d3b2b1c7fde3cc6f4cd5d6c166f7312e34f690a07ecbaac69a045358564142422b45c58784cd5d2d69d9084b7e9f33176893bee2f1589725ed1a443f4b9095e97294f740e8471f468a51db85cc66176af022db77314579776b69eaa8594dbed5d0e0b549675e12c742913da76e3de732c24f7811d8ee32ade2ac1bcb8763c0e898a67695aaab9478c80dc29cc3ae9f1c4b63c116bda64e1e8727881ebe4c1db30219a87d7ff8805675b56a4907d9408bb96438a5182c66a47739f8b12cd5241b5f4e995f4f1fc85041eeaeb158d7ea9c1601a9b3849c6977137a0e82afb72b16748efa456fbae5b28ed82107d79dec3da87d0c0261267a3dbe9dcedb374d96fc00b7478b30f917b2312e7e79133923c2d9aba394bfcdbd00539f7d2d4fdecf9821fdb4c15f253e5ad80d10e360fb84b45e01415a4d5759cd5000ea5c4e80f60a887f9e8ad35ef7cabab83eeb59bf81b3bb10b440707e877c558ca9c80df8d3d8741b838ddf9a5e0e7826a1f6ee0c4f2241687ab0573b18814d21a668861962400148b45a24fdfeb3638a1f16b7c344b088cfffc851317753c1e0602bb0cbfb5357132baf29d6123862eb8b29229a5fd9b173ad4c1b098d11ff23f6ee1c7d357235e647dd99451162cfbed33b7d05df5578859538a9edbeae2cf8ac0903c36e7db352c147c11725a3c5c611b149a4c87e24589d9e31d30a9a8b2cdd863b8dd3ab8c90cde061426a2afedb4aff424cde10e70f1e38207d0fc8be467b4f063739d920bb1906144a704c7ba5be6645899270e5da6380dabfb16e7f906a1f484501005cb383692e054533697a63c8a2f8e1b891b37d5b23afef1de8f9a257f7c9577466fbd87223c5773795ac23ab4cfc0043a965e8695e764174bdc1c778d3d1d6e2a65d9cb7a4b1eb31ca818b0c8abe779fd61a34ee78cfc49fd7682
Shared secret : ee30e0696c36480afb066fa2971535f195a30ce08aacc3dfc182ed0947a44f3a
```
> [!CAUTION]
> Before you consider using Psuedo Random Number Generator which comes with this library implementation, I strongly advice you to go through [include/prng.hpp](./include/prng.hpp).
> Before you consider using Psuedo Random Number Generator which comes with this library implementation, I strongly advice you to go through [include/ml_kem/internals/rng/prng.hpp](./include/ml_kem/internals/rng/prng.hpp).
> [!NOTE]
> Looking at API documentation, in header files, can give you good idea of how to use Kyber KEM API. Note, this library doesn't expose any raw pointer based interface, rather everything is wrapped under statically defined `std::span` - which one can easily create from `std::{array, vector}`. I opt for using statically defined `std::span` based function interfaces because we always know, at compile-time, how many bytes the seeds/ keys/ cipher-texts/ shared-secrets are, for various different Kyber KEM parameters. This gives much better type safety and compile-time error reporting.
> Looking at API documentation, in header files, can give you good idea of how to use ML-KEM API. Note, this library doesn't expose any raw pointer based interface, rather everything is wrapped under statically defined `std::span` - which one can easily create from `std::{array, vector}`. I opt for using statically defined `std::span` based function interfaces because we always know, at compile-time, how many bytes the seeds/ keys/ cipher-texts/ shared-secrets are, for various different ML-KEM parameters. This gives much better type safety and compile-time error reporting.

View File

@@ -1,216 +0,0 @@
#include "bench_helper.hpp"
#include "kem.hpp"
#include "x86_64_cpu_ticks.hpp"
#include <benchmark/benchmark.h>
// Benchmarking IND-CCA2-secure Kyber KEM key generation algorithm
template<size_t k, size_t eta1>
void
bench_keygen(benchmark::State& state)
{
constexpr size_t slen = 32;
constexpr size_t pklen = kyber_utils::get_kem_public_key_len(k);
constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(k);
std::vector<uint8_t> d(slen);
std::vector<uint8_t> z(slen);
std::vector<uint8_t> pkey(pklen);
std::vector<uint8_t> skey(sklen);
auto _d = std::span<uint8_t, slen>(d);
auto _z = std::span<uint8_t, slen>(z);
auto _pkey = std::span<uint8_t, pklen>(pkey);
auto _skey = std::span<uint8_t, sklen>(skey);
prng::prng_t prng;
prng.read(_d);
prng.read(_z);
#ifdef __x86_64__
uint64_t total_ticks = 0ul;
#endif
for (auto _ : state) {
#ifdef __x86_64__
const uint64_t start = cpu_ticks();
#endif
kem::keygen<k, eta1>(_d, _z, _pkey, _skey);
benchmark::DoNotOptimize(_d);
benchmark::DoNotOptimize(_z);
benchmark::DoNotOptimize(_pkey);
benchmark::DoNotOptimize(_skey);
benchmark::ClobberMemory();
#ifdef __x86_64__
const uint64_t end = cpu_ticks();
total_ticks += (end - start);
#endif
}
state.SetItemsProcessed(state.iterations());
#ifdef __x86_64__
total_ticks /= static_cast<uint64_t>(state.iterations());
state.counters["rdtsc"] = static_cast<double>(total_ticks);
#endif
}
// Benchmarking IND-CCA2-secure Kyber KEM encapsulation algorithm
template<size_t k, size_t eta1, size_t eta2, size_t du, size_t dv>
void
bench_encapsulate(benchmark::State& state)
{
constexpr size_t slen = 32;
constexpr size_t pklen = kyber_utils::get_kem_public_key_len(k);
constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(k);
constexpr size_t ctlen = kyber_utils::get_kem_cipher_len(k, du, dv);
constexpr size_t klen = 32;
std::vector<uint8_t> d(slen);
std::vector<uint8_t> z(slen);
std::vector<uint8_t> m(slen);
std::vector<uint8_t> pkey(pklen);
std::vector<uint8_t> skey(sklen);
std::vector<uint8_t> cipher(ctlen);
std::vector<uint8_t> sender_key(klen);
auto _d = std::span<uint8_t, slen>(d);
auto _z = std::span<uint8_t, slen>(z);
auto _m = std::span<uint8_t, slen>(m);
auto _pkey = std::span<uint8_t, pklen>(pkey);
auto _skey = std::span<uint8_t, sklen>(skey);
auto _cipher = std::span<uint8_t, ctlen>(cipher);
auto _sender_key = std::span<uint8_t, klen>(sender_key);
prng::prng_t prng;
prng.read(_d);
prng.read(_z);
kem::keygen<k, eta1>(_d, _z, _pkey, _skey);
prng.read(_m);
#ifdef __x86_64__
uint64_t total_ticks = 0ul;
#endif
for (auto _ : state) {
#ifdef __x86_64__
const uint64_t start = cpu_ticks();
#endif
auto skdf = kem::encapsulate<k, eta1, eta2, du, dv>(_m, _pkey, _cipher);
benchmark::DoNotOptimize(skdf);
skdf.squeeze(_sender_key);
benchmark::DoNotOptimize(_m);
benchmark::DoNotOptimize(_pkey);
benchmark::DoNotOptimize(_cipher);
benchmark::DoNotOptimize(_sender_key);
benchmark::ClobberMemory();
#ifdef __x86_64__
const uint64_t end = cpu_ticks();
total_ticks += (end - start);
#endif
}
state.SetItemsProcessed(state.iterations());
#ifdef __x86_64__
total_ticks /= static_cast<uint64_t>(state.iterations());
state.counters["rdtsc"] = static_cast<double>(total_ticks);
#endif
}
// Benchmarking IND-CCA2-secure Kyber KEM decapsulation algorithm
template<size_t k, size_t eta1, size_t eta2, size_t du, size_t dv>
void
bench_decapsulate(benchmark::State& state)
{
constexpr size_t slen = 32;
constexpr size_t pklen = kyber_utils::get_kem_public_key_len(k);
constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(k);
constexpr size_t ctlen = kyber_utils::get_kem_cipher_len(k, du, dv);
constexpr size_t klen = 32;
std::vector<uint8_t> d(slen);
std::vector<uint8_t> z(slen);
std::vector<uint8_t> m(slen);
std::vector<uint8_t> pkey(pklen);
std::vector<uint8_t> skey(sklen);
std::vector<uint8_t> cipher(ctlen);
std::vector<uint8_t> sender_key(klen);
std::vector<uint8_t> receiver_key(klen);
auto _d = std::span<uint8_t, slen>(d);
auto _z = std::span<uint8_t, slen>(z);
auto _m = std::span<uint8_t, slen>(m);
auto _pkey = std::span<uint8_t, pklen>(pkey);
auto _skey = std::span<uint8_t, sklen>(skey);
auto _cipher = std::span<uint8_t, ctlen>(cipher);
auto _sender_key = std::span<uint8_t, klen>(sender_key);
auto _receiver_key = std::span<uint8_t, klen>(receiver_key);
prng::prng_t prng;
prng.read(_d);
prng.read(_z);
kem::keygen<k, eta1>(_d, _z, _pkey, _skey);
prng.read(_m);
auto skdf = kem::encapsulate<k, eta1, eta2, du, dv>(_m, _pkey, _cipher);
skdf.squeeze(_sender_key);
#ifdef __x86_64__
uint64_t total_ticks = 0ul;
#endif
for (auto _ : state) {
#ifdef __x86_64__
const uint64_t start = cpu_ticks();
#endif
auto rkdf = kem::decapsulate<k, eta1, eta2, du, dv>(_skey, _cipher);
benchmark::DoNotOptimize(rkdf);
rkdf.squeeze(_receiver_key);
benchmark::DoNotOptimize(_skey);
benchmark::DoNotOptimize(_cipher);
benchmark::DoNotOptimize(_receiver_key);
benchmark::ClobberMemory();
#ifdef __x86_64__
const uint64_t end = cpu_ticks();
total_ticks += (end - start);
#endif
}
state.SetItemsProcessed(state.iterations());
assert(std::ranges::equal(_sender_key, _receiver_key));
#ifdef __x86_64__
total_ticks /= static_cast<uint64_t>(state.iterations());
state.counters["rdtsc"] = static_cast<double>(total_ticks);
#endif
}
// Register for benchmarking IND-CCA2-secure Kyber Key Encapsulation Mechanism
// Kyber512
BENCHMARK(bench_keygen<2, 3>)->Name("kyber512/keygen")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_encapsulate<2, 3, 2, 10, 4>)->Name("kyber512/encap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_decapsulate<2, 3, 2, 10, 4>)->Name("kyber512/decap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
// Kyber768
BENCHMARK(bench_keygen<3, 2>)->Name("kyber768/keygen")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_encapsulate<3, 2, 2, 10, 4>)->Name("kyber768/encap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_decapsulate<3, 2, 2, 10, 4>)->Name("kyber768/decap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
// Kyber1024
BENCHMARK(bench_keygen<4, 2>)->Name("kyber1024/keygen")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_encapsulate<4, 2, 2, 11, 5>)->Name("kyber1024/encap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_decapsulate<4, 2, 2, 11, 5>)->Name("kyber1024/decap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);

View File

@@ -0,0 +1,111 @@
#include "bench_helper.hpp"
#include "ml_kem/ml_kem_1024.hpp"
#include <benchmark/benchmark.h>
#include <cassert>
// Benchmarking ML-KEM-1024 key generation algorithm.
void
bench_ml_kem_1024_keygen(benchmark::State& state)
{
std::array<uint8_t, ml_kem_1024::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_1024::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_1024::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_1024::SKEY_BYTE_LEN> seckey{};
ml_kem_prng::prng_t<256> prng{};
prng.read(seed_d);
prng.read(seed_z);
for (auto _ : state) {
ml_kem_1024::keygen(seed_d, seed_z, pubkey, seckey);
benchmark::DoNotOptimize(seed_d);
benchmark::DoNotOptimize(seed_z);
benchmark::DoNotOptimize(pubkey);
benchmark::DoNotOptimize(seckey);
benchmark::ClobberMemory();
}
state.SetItemsProcessed(state.iterations());
}
// Benchmarking ML-KEM-1024 encapsulation algorithm.
void
bench_ml_kem_1024_encapsulate(benchmark::State& state)
{
std::array<uint8_t, ml_kem_1024::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_1024::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_1024::SEED_M_BYTE_LEN> seed_m{};
std::array<uint8_t, ml_kem_1024::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_1024::SKEY_BYTE_LEN> seckey{};
std::array<uint8_t, ml_kem_1024::CIPHER_TEXT_BYTE_LEN> cipher{};
std::array<uint8_t, ml_kem_1024::SHARED_SECRET_BYTE_LEN> shared_secret{};
ml_kem_prng::prng_t<256> prng{};
prng.read(seed_d);
prng.read(seed_z);
prng.read(seed_m);
ml_kem_1024::keygen(seed_d, seed_z, pubkey, seckey);
bool is_encapsulated = true;
for (auto _ : state) {
is_encapsulated &= ml_kem_1024::encapsulate(seed_m, pubkey, cipher, shared_secret);
benchmark::DoNotOptimize(is_encapsulated);
benchmark::DoNotOptimize(seed_m);
benchmark::DoNotOptimize(pubkey);
benchmark::DoNotOptimize(cipher);
benchmark::DoNotOptimize(shared_secret);
benchmark::ClobberMemory();
}
assert(is_encapsulated);
state.SetItemsProcessed(state.iterations());
}
// Benchmarking ML-KEM-1024 decapsulation algorithm.
void
bench_ml_kem_1024_decapsulate(benchmark::State& state)
{
std::array<uint8_t, ml_kem_1024::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_1024::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_1024::SEED_M_BYTE_LEN> seed_m{};
std::array<uint8_t, ml_kem_1024::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_1024::SKEY_BYTE_LEN> seckey{};
std::array<uint8_t, ml_kem_1024::CIPHER_TEXT_BYTE_LEN> cipher{};
std::array<uint8_t, ml_kem_1024::SHARED_SECRET_BYTE_LEN> shared_secret_sender{};
std::array<uint8_t, ml_kem_1024::SHARED_SECRET_BYTE_LEN> shared_secret_receiver{};
ml_kem_prng::prng_t<256> prng{};
prng.read(seed_d);
prng.read(seed_z);
prng.read(seed_m);
ml_kem_1024::keygen(seed_d, seed_z, pubkey, seckey);
(void)ml_kem_1024::encapsulate(seed_m, pubkey, cipher, shared_secret_sender);
for (auto _ : state) {
ml_kem_1024::decapsulate(seckey, cipher, shared_secret_receiver);
benchmark::DoNotOptimize(seckey);
benchmark::DoNotOptimize(cipher);
benchmark::DoNotOptimize(shared_secret_receiver);
benchmark::ClobberMemory();
}
state.SetItemsProcessed(state.iterations());
assert(shared_secret_sender == shared_secret_receiver);
}
BENCHMARK(bench_ml_kem_1024_keygen)->Name("ml_kem_1024/keygen")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_ml_kem_1024_encapsulate)->Name("ml_kem_1024/encap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_ml_kem_1024_decapsulate)->Name("ml_kem_1024/decap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);

View File

@@ -0,0 +1,111 @@
#include "bench_helper.hpp"
#include "ml_kem/ml_kem_512.hpp"
#include <benchmark/benchmark.h>
#include <cassert>
// Benchmarking ML-KEM-512 key generation algorithm.
void
bench_ml_kem_512_keygen(benchmark::State& state)
{
std::array<uint8_t, ml_kem_512::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_512::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_512::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_512::SKEY_BYTE_LEN> seckey{};
ml_kem_prng::prng_t<128> prng{};
prng.read(seed_d);
prng.read(seed_z);
for (auto _ : state) {
ml_kem_512::keygen(seed_d, seed_z, pubkey, seckey);
benchmark::DoNotOptimize(seed_d);
benchmark::DoNotOptimize(seed_z);
benchmark::DoNotOptimize(pubkey);
benchmark::DoNotOptimize(seckey);
benchmark::ClobberMemory();
}
state.SetItemsProcessed(state.iterations());
}
// Benchmarking ML-KEM-512 encapsulation algorithm.
void
bench_ml_kem_512_encapsulate(benchmark::State& state)
{
std::array<uint8_t, ml_kem_512::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_512::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_512::SEED_M_BYTE_LEN> seed_m{};
std::array<uint8_t, ml_kem_512::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_512::SKEY_BYTE_LEN> seckey{};
std::array<uint8_t, ml_kem_512::CIPHER_TEXT_BYTE_LEN> cipher{};
std::array<uint8_t, ml_kem_512::SHARED_SECRET_BYTE_LEN> shared_secret{};
ml_kem_prng::prng_t<128> prng{};
prng.read(seed_d);
prng.read(seed_z);
prng.read(seed_m);
ml_kem_512::keygen(seed_d, seed_z, pubkey, seckey);
bool is_encapsulated = true;
for (auto _ : state) {
is_encapsulated &= ml_kem_512::encapsulate(seed_m, pubkey, cipher, shared_secret);
benchmark::DoNotOptimize(is_encapsulated);
benchmark::DoNotOptimize(seed_m);
benchmark::DoNotOptimize(pubkey);
benchmark::DoNotOptimize(cipher);
benchmark::DoNotOptimize(shared_secret);
benchmark::ClobberMemory();
}
assert(is_encapsulated);
state.SetItemsProcessed(state.iterations());
}
// Benchmarking ML-KEM-512 decapsulation algorithm.
void
bench_ml_kem_512_decapsulate(benchmark::State& state)
{
std::array<uint8_t, ml_kem_512::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_512::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_512::SEED_M_BYTE_LEN> seed_m{};
std::array<uint8_t, ml_kem_512::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_512::SKEY_BYTE_LEN> seckey{};
std::array<uint8_t, ml_kem_512::CIPHER_TEXT_BYTE_LEN> cipher{};
std::array<uint8_t, ml_kem_512::SHARED_SECRET_BYTE_LEN> shared_secret_sender{};
std::array<uint8_t, ml_kem_512::SHARED_SECRET_BYTE_LEN> shared_secret_receiver{};
ml_kem_prng::prng_t<128> prng{};
prng.read(seed_d);
prng.read(seed_z);
prng.read(seed_m);
ml_kem_512::keygen(seed_d, seed_z, pubkey, seckey);
(void)ml_kem_512::encapsulate(seed_m, pubkey, cipher, shared_secret_sender);
for (auto _ : state) {
ml_kem_512::decapsulate(seckey, cipher, shared_secret_receiver);
benchmark::DoNotOptimize(seckey);
benchmark::DoNotOptimize(cipher);
benchmark::DoNotOptimize(shared_secret_receiver);
benchmark::ClobberMemory();
}
state.SetItemsProcessed(state.iterations());
assert(shared_secret_sender == shared_secret_receiver);
}
BENCHMARK(bench_ml_kem_512_keygen)->Name("ml_kem_512/keygen")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_ml_kem_512_encapsulate)->Name("ml_kem_512/encap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_ml_kem_512_decapsulate)->Name("ml_kem_512/decap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);

View File

@@ -0,0 +1,111 @@
#include "bench_helper.hpp"
#include "ml_kem/ml_kem_768.hpp"
#include <benchmark/benchmark.h>
#include <cassert>
// Benchmarking ML-KEM-768 key generation algorithm.
void
bench_ml_kem_768_keygen(benchmark::State& state)
{
std::array<uint8_t, ml_kem_768::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_768::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_768::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_768::SKEY_BYTE_LEN> seckey{};
ml_kem_prng::prng_t<192> prng{};
prng.read(seed_d);
prng.read(seed_z);
for (auto _ : state) {
ml_kem_768::keygen(seed_d, seed_z, pubkey, seckey);
benchmark::DoNotOptimize(seed_d);
benchmark::DoNotOptimize(seed_z);
benchmark::DoNotOptimize(pubkey);
benchmark::DoNotOptimize(seckey);
benchmark::ClobberMemory();
}
state.SetItemsProcessed(state.iterations());
}
// Benchmarking ML-KEM-768 encapsulation algorithm.
void
bench_ml_kem_768_encapsulate(benchmark::State& state)
{
std::array<uint8_t, ml_kem_768::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_768::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_768::SEED_M_BYTE_LEN> seed_m{};
std::array<uint8_t, ml_kem_768::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_768::SKEY_BYTE_LEN> seckey{};
std::array<uint8_t, ml_kem_768::CIPHER_TEXT_BYTE_LEN> cipher{};
std::array<uint8_t, ml_kem_768::SHARED_SECRET_BYTE_LEN> shared_secret{};
ml_kem_prng::prng_t<192> prng{};
prng.read(seed_d);
prng.read(seed_z);
prng.read(seed_m);
ml_kem_768::keygen(seed_d, seed_z, pubkey, seckey);
bool is_encapsulated = true;
for (auto _ : state) {
is_encapsulated &= ml_kem_768::encapsulate(seed_m, pubkey, cipher, shared_secret);
benchmark::DoNotOptimize(is_encapsulated);
benchmark::DoNotOptimize(seed_m);
benchmark::DoNotOptimize(pubkey);
benchmark::DoNotOptimize(cipher);
benchmark::DoNotOptimize(shared_secret);
benchmark::ClobberMemory();
}
assert(is_encapsulated);
state.SetItemsProcessed(state.iterations());
}
// Benchmarking ML-KEM-768 decapsulation algorithm.
void
bench_ml_kem_768_decapsulate(benchmark::State& state)
{
std::array<uint8_t, ml_kem_768::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_768::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_768::SEED_M_BYTE_LEN> seed_m{};
std::array<uint8_t, ml_kem_768::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_768::SKEY_BYTE_LEN> seckey{};
std::array<uint8_t, ml_kem_768::CIPHER_TEXT_BYTE_LEN> cipher{};
std::array<uint8_t, ml_kem_768::SHARED_SECRET_BYTE_LEN> shared_secret_sender{};
std::array<uint8_t, ml_kem_768::SHARED_SECRET_BYTE_LEN> shared_secret_receiver{};
ml_kem_prng::prng_t<192> prng{};
prng.read(seed_d);
prng.read(seed_z);
prng.read(seed_m);
ml_kem_768::keygen(seed_d, seed_z, pubkey, seckey);
(void)ml_kem_768::encapsulate(seed_m, pubkey, cipher, shared_secret_sender);
for (auto _ : state) {
ml_kem_768::decapsulate(seckey, cipher, shared_secret_receiver);
benchmark::DoNotOptimize(seckey);
benchmark::DoNotOptimize(cipher);
benchmark::DoNotOptimize(shared_secret_receiver);
benchmark::ClobberMemory();
}
state.SetItemsProcessed(state.iterations());
assert(shared_secret_sender == shared_secret_receiver);
}
BENCHMARK(bench_ml_kem_768_keygen)->Name("ml_kem_768/keygen")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_ml_kem_768_encapsulate)->Name("ml_kem_768/encap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);
BENCHMARK(bench_ml_kem_768_decapsulate)->Name("ml_kem_768/decap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max);

View File

@@ -1,27 +0,0 @@
#pragma once
#ifdef __x86_64__
#include <cstdint>
#include <emmintrin.h>
#include <x86intrin.h>
// Issues x86_64 architecture specific intrinsic for obtaining CPU ticks passed by, while executing a set of instructions. For example
//
// start = cpu_ticks()
// {
// ... bunch
// ... of
// ... instructions
// }
// end = cpu_ticks()
//
// CPU ticks passed by executing above code block = end - start
static inline uint64_t
cpu_ticks()
{
_mm_mfence();
return __rdtsc();
}
#endif

View File

@@ -1,80 +0,0 @@
#include "kyber512_kem.hpp"
#include <algorithm>
#include <cassert>
#include <iostream>
// Compile it with
//
// g++ -std=c++20 -Wall -Wextra -pedantic -O3 -march=native -I ./include -I ./sha3/include -I ./subtle/include/ examples/kyber512_kem.cpp
int
main()
{
constexpr size_t SEED_LEN = 32;
constexpr size_t KEY_LEN = 32;
// seeds required for keypair generation
std::vector<uint8_t> d(SEED_LEN, 0);
std::vector<uint8_t> z(SEED_LEN, 0);
auto _d = std::span<uint8_t, SEED_LEN>(d);
auto _z = std::span<uint8_t, SEED_LEN>(z);
// public/ private keypair
std::vector<uint8_t> pkey(kyber512_kem::PKEY_LEN, 0);
std::vector<uint8_t> skey(kyber512_kem::SKEY_LEN, 0);
auto _pkey = std::span<uint8_t, kyber512_kem::PKEY_LEN>(pkey);
auto _skey = std::span<uint8_t, kyber512_kem::SKEY_LEN>(skey);
// seed required for key encapsulation
std::vector<uint8_t> m(SEED_LEN, 0);
std::vector<uint8_t> cipher(kyber512_kem::CIPHER_LEN, 0);
auto _m = std::span<uint8_t, SEED_LEN>(m);
auto _cipher = std::span<uint8_t, kyber512_kem::CIPHER_LEN>(cipher);
// shared secret that sender/ receiver arrives at
std::vector<uint8_t> shrd_key0(KEY_LEN, 0);
std::vector<uint8_t> shrd_key1(KEY_LEN, 0);
auto _shrd_key0 = std::span<uint8_t, KEY_LEN>(shrd_key0);
auto _shrd_key1 = std::span<uint8_t, KEY_LEN>(shrd_key1);
// pseudo-randomness source
prng::prng_t prng;
// fill up seeds using PRNG
prng.read(_d);
prng.read(_z);
// generate a keypair
kyber512_kem::keygen(_d, _z, _pkey, _skey);
// fill up seed required for key encapsulation, using PRNG
prng.read(_m);
// encapsulate key, compute cipher text and obtain KDF
auto skdf = kyber512_kem::encapsulate(_m, _pkey, _cipher);
// decapsulate cipher text and obtain KDF
auto rkdf = kyber512_kem::decapsulate(_skey, _cipher);
// both sender's and receiver's KDF should produce same KEY_LEN many bytes
skdf.squeeze(_shrd_key0);
rkdf.squeeze(_shrd_key1);
// check that both of the communicating parties arrived at same shared key
assert(std::ranges::equal(_shrd_key0, _shrd_key1));
{
using namespace kyber_utils;
std::cout << "Kyber512 KEM\n";
std::cout << "\npubkey : " << to_hex(_pkey);
std::cout << "\nseckey : " << to_hex(_skey);
std::cout << "\ncipher : " << to_hex(_cipher);
std::cout << "\nshared secret : " << to_hex(_shrd_key0);
std::cout << "\n";
}
return EXIT_SUCCESS;
}

85
examples/ml_kem_768.cpp Normal file
View File

@@ -0,0 +1,85 @@
#include "ml_kem/ml_kem_768.hpp"
#include <algorithm>
#include <cassert>
#include <iomanip>
#include <iostream>
#include <sstream>
// Given a bytearray of length N, this function converts it to human readable hex formatted string of length 2*N | N >= 0.
static inline std::string
to_hex(std::span<const uint8_t> bytes)
{
std::stringstream ss;
ss << std::hex;
for (size_t i = 0; i < bytes.size(); i++) {
ss << std::setw(2) << std::setfill('0') << static_cast<uint32_t>(bytes[i]);
}
return ss.str();
}
// Compile it with
//
// g++ -std=c++20 -Wall -Wextra -pedantic -O3 -march=native -I ./include -I ./sha3/include -I ./subtle/include/ examples/ml_kem_768.cpp
int
main()
{
// Seeds required for keypair generation
std::vector<uint8_t> d(ml_kem_768::SEED_D_BYTE_LEN, 0);
std::vector<uint8_t> z(ml_kem_768::SEED_Z_BYTE_LEN, 0);
auto d_span = std::span<uint8_t, ml_kem_768::SEED_D_BYTE_LEN>(d);
auto z_span = std::span<uint8_t, ml_kem_768::SEED_Z_BYTE_LEN>(z);
// Public/ private keypair
std::vector<uint8_t> pkey(ml_kem_768::PKEY_BYTE_LEN, 0);
std::vector<uint8_t> skey(ml_kem_768::SKEY_BYTE_LEN, 0);
auto pkey_span = std::span<uint8_t, ml_kem_768::PKEY_BYTE_LEN>(pkey);
auto skey_span = std::span<uint8_t, ml_kem_768::SKEY_BYTE_LEN>(skey);
// Seed required for key encapsulation
std::vector<uint8_t> m(ml_kem_768::SEED_M_BYTE_LEN, 0);
std::vector<uint8_t> cipher(ml_kem_768::CIPHER_TEXT_BYTE_LEN, 0);
auto m_span = std::span<uint8_t, ml_kem_768::SEED_M_BYTE_LEN>(m);
auto cipher_span = std::span<uint8_t, ml_kem_768::CIPHER_TEXT_BYTE_LEN>(cipher);
// Shared secret that sender/ receiver arrives at
std::vector<uint8_t> sender_key(ml_kem_768::SHARED_SECRET_BYTE_LEN, 0);
std::vector<uint8_t> receiver_key(ml_kem_768::SHARED_SECRET_BYTE_LEN, 0);
auto sender_key_span = std::span<uint8_t, ml_kem_768::SHARED_SECRET_BYTE_LEN>(sender_key);
auto receiver_key_span = std::span<uint8_t, ml_kem_768::SHARED_SECRET_BYTE_LEN>(receiver_key);
// Pseudo-randomness source
ml_kem_prng::prng_t<128> prng{};
// Fill up seeds using PRNG
prng.read(d_span);
prng.read(z_span);
// Generate a keypair
ml_kem_768::keygen(d_span, z_span, pkey_span, skey_span);
// Fill up seed required for key encapsulation, using PRNG
prng.read(m_span);
// Encapsulate key, compute cipher text and obtain KDF
const bool is_encapsulated = ml_kem_768::encapsulate(m_span, pkey_span, cipher_span, sender_key_span);
// Decapsulate cipher text and obtain KDF
ml_kem_768::decapsulate(skey_span, cipher_span, receiver_key_span);
// Check that both of the communicating parties arrived at same shared secret key
assert(std::ranges::equal(sender_key_span, receiver_key_span));
std::cout << "ML-KEM-768\n";
std::cout << "Pubkey : " << to_hex(pkey_span) << "\n";
std::cout << "Seckey : " << to_hex(skey_span) << "\n";
std::cout << "Encapsulated ? : " << std::boolalpha << is_encapsulated << "\n";
std::cout << "Cipher : " << to_hex(cipher_span) << "\n";
std::cout << "Shared secret : " << to_hex(sender_key_span) << "\n";
return EXIT_SUCCESS;
}

View File

@@ -1,81 +0,0 @@
#pragma once
#include "field.hpp"
#include "ntt.hpp"
#include "params.hpp"
#include <span>
// IND-CPA-secure Public Key Encryption Scheme Utilities
namespace kyber_utils {
// Given an element x ∈ Z_q | q = 3329, this routine compresses it by discarding
// some low-order bits, computing y ∈ [0, 2^d) | d < round(log2(q))
//
// See top of page 5 of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
//
// Following implementation collects inspiration from https://github.com/FiloSottile/mlkem768/blob/cffbfb96c407b3cfc9f6e1749475b673794402c1/mlkem768.go#L395-L425.
template<size_t d>
static inline constexpr field::zq_t
compress(const field::zq_t x)
requires(kyber_params::check_d(d))
{
constexpr uint16_t mask = (1u << d) - 1;
const auto dividend = x.raw() << d;
const auto quotient0 = static_cast<uint32_t>((static_cast<uint64_t>(dividend) * field::R) >> (field::RADIX_BIT_WIDTH * 2));
const auto remainder = dividend - quotient0 * field::Q;
const auto quotient1 = quotient0 + ((((field::Q / 2) - remainder) >> 31) & 1);
const auto quotient2 = quotient1 + (((field::Q + (field::Q / 2) - remainder) >> 31) & 1);
return field::zq_t(static_cast<uint16_t>(quotient2) & mask);
}
// Given an element x ∈ [0, 2^d) | d < round(log2(q)), this routine decompresses
// it back to y ∈ Z_q | q = 3329
//
// This routine recovers the compressed element with error probability as
// defined in eq. 2 of Kyber specification.
//
// See top of page 5 of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
template<size_t d>
static inline constexpr field::zq_t
decompress(const field::zq_t x)
requires(kyber_params::check_d(d))
{
constexpr uint32_t t0 = 1u << d;
constexpr uint32_t t1 = t0 >> 1;
const uint32_t t2 = field::Q * x.raw();
const uint32_t t3 = t2 + t1;
const uint16_t t4 = static_cast<uint16_t>(t3 >> d);
return field::zq_t(t4);
}
// Utility function to compress each of 256 coefficients of a degree-255
// polynomial s.t. input polynomial is mutated.
template<size_t d>
static inline constexpr void
poly_compress(std::span<field::zq_t, ntt::N> poly)
requires(kyber_params::check_d(d))
{
for (size_t i = 0; i < poly.size(); i++) {
poly[i] = compress<d>(poly[i]);
}
}
// Utility function to decompress each of 256 coefficients of a degree-255
// polynomial s.t. input polynomial is mutated.
template<size_t d>
static inline constexpr void
poly_decompress(std::span<field::zq_t, ntt::N> poly)
requires(kyber_params::check_d(d))
{
for (size_t i = 0; i < poly.size(); i++) {
poly[i] = decompress<d>(poly[i]);
}
}
}

View File

@@ -1,207 +0,0 @@
#pragma once
#include "pke.hpp"
#include "sha3_256.hpp"
#include "sha3_512.hpp"
#include "shake256.hpp"
#include "utils.hpp"
#include <array>
#include <cstdint>
// IND-CCA2-secure Key Encapsulation Mechanism
namespace kem {
// Kyber CCAKEM key generation algorithm, which takes two parameters `k` & `η1`
// ( read eta1 ) and generates byte serialized public key and secret key of
// following length
//
// public key: (k * 12 * 32 + 32) -bytes wide
// secret key: (k * 24 * 32 + 96) -bytes wide [ includes public key ]
//
// See algorithm 7 defined in Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
//
// Note, this routine allows you to pass two 32 -bytes seeds ( see first &
// second parameter ), which is designed this way for ease of writing test cases
// against known answer tests, obtained from Kyber reference implementation
// https://github.com/pq-crystals/kyber.git. It also helps in properly
// benchmarking underlying KEM's key generation implementation.
template<size_t k, size_t eta1>
static inline void
keygen(std::span<const uint8_t, 32> d, // used in CPA-PKE
std::span<const uint8_t, 32> z, // used in CCA-KEM
std::span<uint8_t, kyber_utils::get_kem_public_key_len(k)> pubkey,
std::span<uint8_t, kyber_utils::get_kem_secret_key_len(k)> seckey)
requires(kyber_params::check_keygen_params(k, eta1))
{
constexpr size_t skoff0 = k * 12 * 32;
constexpr size_t skoff1 = skoff0 + pubkey.size();
constexpr size_t skoff2 = skoff1 + 32;
auto _seckey0 = seckey.template subspan<0, skoff0>();
auto _seckey1 = seckey.template subspan<skoff0, skoff1 - skoff0>();
auto _seckey2 = seckey.template subspan<skoff1, skoff2 - skoff1>();
auto _seckey3 = seckey.template subspan<skoff2, seckey.size() - skoff2>();
pke::keygen<k, eta1>(d, pubkey, _seckey0); // CPAPKE key generation
std::copy(pubkey.begin(), pubkey.end(), _seckey1.begin());
std::copy(z.begin(), z.end(), _seckey3.begin());
// hash public key
sha3_256::sha3_256_t hasher;
hasher.absorb(pubkey);
hasher.finalize();
hasher.digest(_seckey2);
}
// Given (k * 12 * 32 + 32) -bytes public key and 32 -bytes seed ( used for
// deriving 32 -bytes message & 32 -bytes random coin ), this routine computes
// cipher text of length (k * du * 32 + dv * 32) -bytes which can be shared with
// recipient party ( having respective secret key ) over insecure channel.
//
// It also returns a SHAKE256 object which acts as a KDF ( key derivation
// function ), used for generating arbitrary length shared secret key, to be
// used for symmetric key encryption between these two participating entities.
//
// Other side of communication should also be able to generate same arbitrary
// length key stream ( using KDF ), after successful decryption of cipher text.
//
// See algorithm 8 defined in Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
//
// Note, this routine allows you to pass 32 -bytes seed ( see first parameter ),
// which is designed this way for ease of writing test cases against known
// answer tests, obtained from Kyber reference implementation
// https://github.com/pq-crystals/kyber.git. It also helps in properly
// benchmarking underlying KEM's encapsulation implementation.
template<size_t k, size_t eta1, size_t eta2, size_t du, size_t dv>
static inline shake256::shake256_t
encapsulate(std::span<const uint8_t, 32> m,
std::span<const uint8_t, kyber_utils::get_kem_public_key_len(k)> pubkey,
std::span<uint8_t, kyber_utils::get_kem_cipher_len(k, du, dv)> cipher)
requires(kyber_params::check_encap_params(k, eta1, eta2, du, dv))
{
std::array<uint8_t, 64> g_in{};
std::array<uint8_t, 64> g_out{};
std::array<uint8_t, 64> kdf_in{};
auto _g_in = std::span(g_in);
auto _g_out = std::span(g_out);
auto _kdf_in = std::span(kdf_in);
auto _g_in0 = _g_in.template subspan<0, 32>();
auto _g_in1 = _g_in.template subspan<_g_in0.size(), 32>();
auto _g_out0 = _g_out.template subspan<0, 32>();
auto _g_out1 = _g_out.template subspan<_g_out0.size(), 32>();
auto _kdf_in0 = _kdf_in.template subspan<0, 32>();
auto _kdf_in1 = _kdf_in.template subspan<_kdf_in0.size(), 32>();
sha3_256::sha3_256_t h256;
h256.absorb(m);
h256.finalize();
h256.digest(_g_in0);
h256.reset();
h256.absorb(pubkey);
h256.finalize();
h256.digest(_g_in1);
h256.reset();
sha3_512::sha3_512_t h512;
h512.absorb(_g_in);
h512.finalize();
h512.digest(_g_out);
pke::encrypt<k, eta1, eta2, du, dv>(pubkey, _g_in0, _g_out1, cipher);
std::copy(_g_out0.begin(), _g_out0.end(), _kdf_in0.begin());
h256.absorb(cipher);
h256.finalize();
h256.digest(_kdf_in1);
shake256::shake256_t xof256;
xof256.absorb(_kdf_in);
xof256.finalize();
return xof256;
}
// Given (k * 24 * 32 + 96) -bytes secret key and (k * du * 32 + dv * 32) -bytes
// encrypted ( cipher ) text, this routine recovers 32 -bytes plain text which
// was encrypted by sender, using respective public key, associated with this
// secret key.
// Recovered 32 -bytes plain text is used for deriving same key stream ( using
// SHAKE256 key derivation function ), which is the shared secret key between
// two communicating parties, over insecure channel. Using returned KDF (
// SHAKE256 object ) both parties can reach to same shared secret key ( of
// arbitrary length ), which will be used for encrypting traffic using symmetric
// key primitives.
//
// See algorithm 9 defined in Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
template<size_t k, size_t eta1, size_t eta2, size_t du, size_t dv>
static inline shake256::shake256_t
decapsulate(std::span<const uint8_t, kyber_utils::get_kem_secret_key_len(k)> seckey, std::span<const uint8_t, kyber_utils::get_kem_cipher_len(k, du, dv)> cipher)
requires(kyber_params::check_decap_params(k, eta1, eta2, du, dv))
{
constexpr size_t sklen = k * 12 * 32;
constexpr size_t pklen = k * 12 * 32 + 32;
constexpr size_t ctlen = cipher.size();
constexpr size_t skoff0 = sklen;
constexpr size_t skoff1 = skoff0 + pklen;
constexpr size_t skoff2 = skoff1 + 32;
auto pke_sk = seckey.template subspan<0, skoff0>();
auto pubkey = seckey.template subspan<skoff0, skoff1 - skoff0>();
auto h = seckey.template subspan<skoff1, skoff2 - skoff1>();
auto z = seckey.template subspan<skoff2, seckey.size() - skoff2>();
std::array<uint8_t, 64> g_in{};
std::array<uint8_t, 64> g_out{};
std::array<uint8_t, cipher.size()> c_prime{};
std::array<uint8_t, 64> kdf_in{};
auto _g_in = std::span(g_in);
auto _g_out = std::span(g_out);
auto _kdf_in = std::span(kdf_in);
auto _g_in0 = _g_in.template subspan<0, 32>();
auto _g_in1 = _g_in.template subspan<_g_in0.size(), 32>();
auto _g_out0 = _g_out.template subspan<0, 32>();
auto _g_out1 = _g_out.template subspan<_g_out0.size(), 32>();
auto _kdf_in0 = _kdf_in.template subspan<0, 32>();
auto _kdf_in1 = _kdf_in.template subspan<_kdf_in0.size(), 32>();
pke::decrypt<k, du, dv>(pke_sk, cipher, _g_in0);
std::copy(h.begin(), h.end(), _g_in1.begin());
sha3_512::sha3_512_t h512;
h512.absorb(_g_in);
h512.finalize();
h512.digest(_g_out);
pke::encrypt<k, eta1, eta2, du, dv>(pubkey, _g_in0, _g_out1, c_prime);
// line 7-11 of algorithm 9, in constant-time
using kdf_t = std::span<const uint8_t, 32>;
const uint32_t cond = kyber_utils::ct_memcmp(cipher, std::span<const uint8_t, ctlen>(c_prime));
kyber_utils::ct_cond_memcpy(cond, _kdf_in0, kdf_t(_g_out0), kdf_t(z));
sha3_256::sha3_256_t h256;
h256.absorb(cipher);
h256.finalize();
h256.digest(_kdf_in1);
shake256::shake256_t xof256;
xof256.absorb(_kdf_in);
xof256.finalize();
return xof256;
}
}

View File

@@ -1,61 +0,0 @@
#pragma once
#include "kem.hpp"
#include "utils.hpp"
// Kyber Key Encapsulation Mechanism (KEM) instantiated with Kyber1024
// parameters
namespace kyber1024_kem {
// See row 3 of table 1 of specification @
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
constexpr size_t k = 4;
constexpr size_t η1 = 2;
constexpr size_t η2 = 2;
constexpr size_t du = 11;
constexpr size_t dv = 5;
// = 1568 -bytes Kyber1024 public key
constexpr size_t PKEY_LEN = kyber_utils::get_kem_public_key_len(k);
// = 3168 -bytes Kyber1024 secret key
constexpr size_t SKEY_LEN = kyber_utils::get_kem_secret_key_len(k);
// = 1568 -bytes Kyber1024 cipher text length
constexpr size_t CIPHER_LEN = kyber_utils::get_kem_cipher_len(k, du, dv);
// Computes a new Kyber1024 KEM keypair s.t. public key is 1568 -bytes and
// secret key is 3168 -bytes, given 32 -bytes seed d ( used in CPA-PKE ) and 32
// -bytes seed z ( used in CCA-KEM ).
inline void
keygen(std::span<const uint8_t, 32> d, std::span<const uint8_t, 32> z, std::span<uint8_t, PKEY_LEN> pubkey, std::span<uint8_t, SKEY_LEN> seckey)
{
kem::keygen<k, η1>(d, z, pubkey, seckey);
}
// Given 32 -bytes seed m ( which is used during encapsulation ) and a Kyber1024
// KEM public key ( of 1568 -bytes ), this routine computes a SHAKE256 XOF
// backed KDF (key derivation function) and 1568 -bytes of cipher text, which
// can only be decrypted by corresponding Kyber1024 KEM secret key, for arriving
// at same SHAKE256 XOF backed KDF.
//
// Returned KDF can be used for deriving shared key of arbitrary bytes length.
inline shake256::shake256_t
encapsulate(std::span<const uint8_t, 32> m, std::span<const uint8_t, PKEY_LEN> pubkey, std::span<uint8_t, CIPHER_LEN> cipher)
{
return kem::encapsulate<k, η1, η2, du, dv>(m, pubkey, cipher);
}
// Given a Kyber1024 KEM secret key ( of 3168 -bytes ) and a cipher text of 1568
// -bytes, which holds encrypted ( using corresponding Kyber1024 KEM public key
// ) 32 -bytes seed, this routine computes a SHAKE256 XOF backed KDF (key
// derivation function).
//
// Returned KDF can be used for deriving shared key of arbitrary bytes length.
inline shake256::shake256_t
decapsulate(std::span<const uint8_t, SKEY_LEN> seckey, std::span<const uint8_t, CIPHER_LEN> cipher)
{
return kem::decapsulate<k, η1, η2, du, dv>(seckey, cipher);
}
}

View File

@@ -1,61 +0,0 @@
#pragma once
#include "kem.hpp"
#include "utils.hpp"
#include <span>
// Kyber Key Encapsulation Mechanism (KEM) instantiated with Kyber512 parameters
namespace kyber512_kem {
// See row 1 of table 1 of specification @
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
constexpr size_t k = 2;
constexpr size_t η1 = 3;
constexpr size_t η2 = 2;
constexpr size_t du = 10;
constexpr size_t dv = 4;
// = 800 -bytes Kyber512 public key
constexpr size_t PKEY_LEN = kyber_utils::get_kem_public_key_len(k);
// = 1632 -bytes Kyber512 secret key
constexpr size_t SKEY_LEN = kyber_utils::get_kem_secret_key_len(k);
// = 768 -bytes Kyber512 cipher text length
constexpr size_t CIPHER_LEN = kyber_utils::get_kem_cipher_len(k, du, dv);
// Computes a new Kyber512 KEM keypair s.t. public key is 800 -bytes and secret
// key is 1632 -bytes, given 32 -bytes seed d ( used in CPA-PKE ) and 32 -bytes
// seed z ( used in CCA-KEM ).
inline void
keygen(std::span<const uint8_t, 32> d, std::span<const uint8_t, 32> z, std::span<uint8_t, PKEY_LEN> pubkey, std::span<uint8_t, SKEY_LEN> seckey)
{
kem::keygen<k, η1>(d, z, pubkey, seckey);
}
// Given 32 -bytes seed m ( which is used during encapsulation ) and a Kyber512
// KEM public key ( of 800 -bytes ), this routine computes a SHAKE256 XOF backed
// KDF (key derivation function) and 768 -bytes of cipher text, which can only
// be decrypted by corresponding Kyber512 KEM secret key, for arriving at same
// SHAKE256 XOF backed KDF.
//
// Returned KDF can be used for deriving shared key of arbitrary bytes length.
inline shake256::shake256_t
encapsulate(std::span<const uint8_t, 32> m, std::span<const uint8_t, PKEY_LEN> pubkey, std::span<uint8_t, CIPHER_LEN> cipher)
{
return kem::encapsulate<k, η1, η2, du, dv>(m, pubkey, cipher);
}
// Given a Kyber512 KEM secret key ( of 1632 -bytes ) and a cipher text of 768
// -bytes, which holds encrypted ( using corresponding Kyber512 KEM public key )
// 32 -bytes seed, this routine computes a SHAKE256 XOF backed KDF (key
// derivation function).
//
// Returned KDF can be used for deriving shared key of arbitrary bytes length.
inline shake256::shake256_t
decapsulate(std::span<const uint8_t, SKEY_LEN> seckey, std::span<const uint8_t, CIPHER_LEN> cipher)
{
return kem::decapsulate<k, η1, η2, du, dv>(seckey, cipher);
}
}

View File

@@ -1,60 +0,0 @@
#pragma once
#include "kem.hpp"
#include "utils.hpp"
// Kyber Key Encapsulation Mechanism (KEM) instantiated with Kyber768 parameters
namespace kyber768_kem {
// See row 2 of table 1 of specification @
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
constexpr size_t k = 3;
constexpr size_t η1 = 2;
constexpr size_t η2 = 2;
constexpr size_t du = 10;
constexpr size_t dv = 4;
// = 1184 -bytes Kyber768 public key
constexpr size_t PKEY_LEN = kyber_utils::get_kem_public_key_len(k);
// = 2400 -bytes Kyber768 secret key
constexpr size_t SKEY_LEN = kyber_utils::get_kem_secret_key_len(k);
// = 1088 -bytes Kyber768 cipher text length
constexpr size_t CIPHER_LEN = kyber_utils::get_kem_cipher_len(k, du, dv);
// Computes a new Kyber768 KEM keypair s.t. public key is 1184 -bytes and secret
// key is 2400 -bytes, given 32 -bytes seed d ( used in CPA-PKE ) and 32 -bytes
// seed z ( used in CCA-KEM ).
inline void
keygen(std::span<const uint8_t, 32> d, std::span<const uint8_t, 32> z, std::span<uint8_t, PKEY_LEN> pubkey, std::span<uint8_t, SKEY_LEN> seckey)
{
kem::keygen<k, η1>(d, z, pubkey, seckey);
}
// Given 32 -bytes seed m ( which is used during encapsulation ) and a Kyber768
// KEM public key ( of 1184 -bytes ), this routine computes a SHAKE256 XOF
// backed KDF (key derivation function) and 1088 -bytes of cipher text, which
// can only be decrypted by corresponding Kyber768 KEM secret key, for arriving
// at same SHAKE256 XOF backed KDF.
//
// Returned KDF can be used for deriving shared key of arbitrary bytes length.
inline shake256::shake256_t
encapsulate(std::span<const uint8_t, 32> m, std::span<const uint8_t, PKEY_LEN> pubkey, std::span<uint8_t, CIPHER_LEN> cipher)
{
return kem::encapsulate<k, η1, η2, du, dv>(m, pubkey, cipher);
}
// Given a Kyber768 KEM secret key ( of 2400 -bytes ) and a cipher text of 1088
// -bytes, which holds encrypted ( using corresponding Kyber768 KEM public key )
// 32 -bytes seed, this routine computes a SHAKE256 XOF backed KDF (key
// derivation function).
//
// Returned KDF can be used for deriving shared key of arbitrary bytes length.
inline shake256::shake256_t
decapsulate(std::span<const uint8_t, SKEY_LEN> seckey, std::span<const uint8_t, CIPHER_LEN> cipher)
{
return kem::decapsulate<k, η1, η2, du, dv>(seckey, cipher);
}
}

View File

@@ -0,0 +1,178 @@
#pragma once
#include "ml_kem/internals/math/field.hpp"
#include "ml_kem/internals/poly/poly_vec.hpp"
#include "ml_kem/internals/poly/sampling.hpp"
#include "ml_kem/internals/utility/params.hpp"
#include "ml_kem/internals/utility/utils.hpp"
#include "sha3_512.hpp"
// Public Key Encryption Scheme
namespace k_pke {
// K-PKE key generation algorithm, generating byte serialized public key and secret keym given a 32 -bytes input seed `d`.
// See algorithm 12 of K-PKE specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
template<size_t k, size_t eta1>
static inline constexpr void
keygen(std::span<const uint8_t, 32> d, std::span<uint8_t, k * 12 * 32 + 32> pubkey, std::span<uint8_t, k * 12 * 32> seckey)
requires(ml_kem_params::check_keygen_params(k, eta1))
{
std::array<uint8_t, 64> g_out{};
auto _g_out = std::span(g_out);
sha3_512::sha3_512_t h512;
h512.absorb(d);
h512.finalize();
h512.digest(_g_out);
const auto rho = _g_out.template subspan<0, 32>();
const auto sigma = _g_out.template subspan<rho.size(), 32>();
std::array<ml_kem_field::zq_t, k * k * ml_kem_ntt::N> A_prime{};
ml_kem_utils::generate_matrix<k, false>(A_prime, rho);
uint8_t N = 0;
std::array<ml_kem_field::zq_t, k * ml_kem_ntt::N> s{};
ml_kem_utils::generate_vector<k, eta1>(s, sigma, N);
N += k;
std::array<ml_kem_field::zq_t, k * ml_kem_ntt::N> e{};
ml_kem_utils::generate_vector<k, eta1>(e, sigma, N);
N += k;
ml_kem_utils::poly_vec_ntt<k>(s);
ml_kem_utils::poly_vec_ntt<k>(e);
std::array<ml_kem_field::zq_t, k * ml_kem_ntt::N> t_prime{};
ml_kem_utils::matrix_multiply<k, k, k, 1>(A_prime, s, t_prime);
ml_kem_utils::poly_vec_add_to<k>(e, t_prime);
constexpr size_t pkoff = k * 12 * 32;
auto _pubkey0 = pubkey.template subspan<0, pkoff>();
auto _pubkey1 = pubkey.template subspan<pkoff, 32>();
ml_kem_utils::poly_vec_encode<k, 12>(t_prime, _pubkey0);
std::copy(rho.begin(), rho.end(), _pubkey1.begin());
ml_kem_utils::poly_vec_encode<k, 12>(s, seckey);
}
// Given a *valid* K-PKE public key, 32 -bytes message ( to be encrypted ) and 32 -bytes random coin
// ( from where all randomness is deterministically sampled ), this routine encrypts message using
// K-PKE encryption algorithm, computing compressed cipher text.
//
// If modulus check, as described in point (2) of section 6.2 of ML-KEM draft standard, fails, it returns false.
//
// See algorithm 13 of K-PKE specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
template<size_t k, size_t eta1, size_t eta2, size_t du, size_t dv>
[[nodiscard("Use result of modulus check on public key")]] static inline constexpr bool
encrypt(std::span<const uint8_t, k * 12 * 32 + 32> pubkey,
std::span<const uint8_t, 32> msg,
std::span<const uint8_t, 32> rcoin,
std::span<uint8_t, k * du * 32 + dv * 32> enc)
requires(ml_kem_params::check_encrypt_params(k, eta1, eta2, du, dv))
{
constexpr size_t pkoff = k * 12 * 32;
auto _pubkey0 = pubkey.template subspan<0, pkoff>();
auto rho = pubkey.template subspan<pkoff, 32>();
std::array<ml_kem_field::zq_t, k * ml_kem_ntt::N> t_prime{};
std::array<uint8_t, _pubkey0.size()> encoded_tprime{};
ml_kem_utils::poly_vec_decode<k, 12>(_pubkey0, t_prime);
ml_kem_utils::poly_vec_encode<k, 12>(t_prime, encoded_tprime);
using encoded_pkey_t = std::span<const uint8_t, _pubkey0.size()>;
const auto are_equal = ml_kem_utils::ct_memcmp(encoded_pkey_t(_pubkey0), encoded_pkey_t(encoded_tprime));
if (are_equal == 0u) {
// Got an invalid public key
return false;
}
std::array<ml_kem_field::zq_t, k * k * ml_kem_ntt::N> A_prime{};
ml_kem_utils::generate_matrix<k, true>(A_prime, rho);
uint8_t N = 0;
std::array<ml_kem_field::zq_t, k * ml_kem_ntt::N> r{};
ml_kem_utils::generate_vector<k, eta1>(r, rcoin, N);
N += k;
std::array<ml_kem_field::zq_t, k * ml_kem_ntt::N> e1{};
ml_kem_utils::generate_vector<k, eta2>(e1, rcoin, N);
N += k;
std::array<ml_kem_field::zq_t, ml_kem_ntt::N> e2{};
ml_kem_utils::generate_vector<1, eta2>(e2, rcoin, N);
ml_kem_utils::poly_vec_ntt<k>(r);
std::array<ml_kem_field::zq_t, k * ml_kem_ntt::N> u{};
ml_kem_utils::matrix_multiply<k, k, k, 1>(A_prime, r, u);
ml_kem_utils::poly_vec_intt<k>(u);
ml_kem_utils::poly_vec_add_to<k>(e1, u);
std::array<ml_kem_field::zq_t, ml_kem_ntt::N> v{};
ml_kem_utils::matrix_multiply<1, k, k, 1>(t_prime, r, v);
ml_kem_utils::poly_vec_intt<1>(v);
ml_kem_utils::poly_vec_add_to<1>(e2, v);
std::array<ml_kem_field::zq_t, ml_kem_ntt::N> m{};
ml_kem_utils::decode<1>(msg, m);
ml_kem_utils::poly_decompress<1>(m);
ml_kem_utils::poly_vec_add_to<1>(m, v);
constexpr size_t encoff = k * du * 32;
auto _enc0 = enc.template subspan<0, encoff>();
auto _enc1 = enc.template subspan<encoff, dv * 32>();
ml_kem_utils::poly_vec_compress<k, du>(u);
ml_kem_utils::poly_vec_encode<k, du>(u, _enc0);
ml_kem_utils::poly_compress<dv>(v);
ml_kem_utils::encode<dv>(v, _enc1);
return true;
}
// Given K-PKE secret key and cipher text, this routine recovers 32 -bytes plain text which
// was encrypted using K-PKE public key i.e. associated with this secret key.
//
// See algorithm 14 defined in K-PKE specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
template<size_t k, size_t du, size_t dv>
static inline constexpr void
decrypt(std::span<const uint8_t, k * 12 * 32> seckey, std::span<const uint8_t, k * du * 32 + dv * 32> enc, std::span<uint8_t, 32> dec)
requires(ml_kem_params::check_decrypt_params(k, du, dv))
{
constexpr size_t encoff = k * du * 32;
auto _enc0 = enc.template subspan<0, encoff>();
auto _enc1 = enc.template subspan<encoff, dv * 32>();
std::array<ml_kem_field::zq_t, k * ml_kem_ntt::N> u{};
ml_kem_utils::poly_vec_decode<k, du>(_enc0, u);
ml_kem_utils::poly_vec_decompress<k, du>(u);
std::array<ml_kem_field::zq_t, ml_kem_ntt::N> v{};
ml_kem_utils::decode<dv>(_enc1, v);
ml_kem_utils::poly_decompress<dv>(v);
std::array<ml_kem_field::zq_t, k * ml_kem_ntt::N> s_prime{};
ml_kem_utils::poly_vec_decode<k, 12>(seckey, s_prime);
ml_kem_utils::poly_vec_ntt<k>(u);
std::array<ml_kem_field::zq_t, ml_kem_ntt::N> t{};
ml_kem_utils::matrix_multiply<1, k, k, 1>(s_prime, u, t);
ml_kem_utils::poly_vec_intt<1>(t);
ml_kem_utils::poly_vec_sub_from<1>(t, v);
ml_kem_utils::poly_compress<1>(v);
ml_kem_utils::encode<1>(v, dec);
}
}

View File

@@ -1,16 +1,15 @@
#pragma once
#include "prng.hpp"
#include "ml_kem/internals/rng/prng.hpp"
#include <bit>
#include <cstdint>
// Prime field arithmetic over Zq, for Kyber PQC Algorithm s.t. q = 3329
namespace field {
namespace ml_kem_field {
// Kyber Prime Field Modulus ( = 3329 )
constexpr uint32_t Q = (1u << 8) * 13 + 1;
// Ml_kem Prime Field Modulus ( = 3329 )
static constexpr uint32_t Q = (1u << 8) * 13 + 1;
// Bit width of Kyber Prime Field Modulus ( = 12 )
constexpr size_t RADIX_BIT_WIDTH = std::bit_width(Q);
// Bit width of Ml_kem Prime Field Modulus ( = 12 )
static constexpr size_t Q_BIT_WIDTH = std::bit_width(Q);
// Precomputed Barrett Reduction Constant
//
@@ -20,64 +19,72 @@ constexpr size_t RADIX_BIT_WIDTH = std::bit_width(Q);
// r = floor((1 << 2k) / Q) = 5039
//
// See https://www.nayuki.io/page/barrett-reduction-algorithm.
constexpr uint32_t R = (1u << (2 * RADIX_BIT_WIDTH)) / Q;
static constexpr uint32_t R = (1u << (2 * Q_BIT_WIDTH)) / Q;
// Prime field Zq | q = 3329, with arithmetic operations defined over it.
//
// This implementation collects inspiration from
// https://github.com/itzmeanjan/dilithium/blob/3fe6ab61d2d70c1a0b71fc6ed4449f64da08b020/include/field.hpp.
// Collects inspiration from https://github.com/itzmeanjan/dilithium/blob/3fe6ab61/include/field.hpp.
struct zq_t
{
public:
// Returns prime field element 0.
inline constexpr zq_t() = default;
private:
// Underlying value held in this type.
//
// Note, v is always kept in its canonical form i.e. v ∈ [0, Q).
uint32_t v = 0u;
// Constructs field element s.t. input is already reduced by prime modulo Q.
inline constexpr zq_t(const uint16_t a) { this->v = a; }
// Given a 32 -bit unsigned integer `v` such that `v` ∈ [0, 2*Q), this routine can be invoked for reducing `v` modulo prime Q.
static inline constexpr uint32_t reduce_once(const uint32_t v)
{
const uint32_t t0 = v - Q;
const uint32_t t1 = -(t0 >> 31);
const uint32_t t2 = Q & t1;
const uint32_t t3 = t0 + t2;
return t3;
}
// Given a 32 -bit unsigned integer `v` such that `v` ∈ [0, Q*Q), this routine can be invoked for reducing `v` modulo Q, using
// barrett reduction technique, following algorithm description @ https://www.nayuki.io/page/barrett-reduction-algorithm.
static inline constexpr uint32_t barrett_reduce(const uint32_t v)
{
const uint64_t t0 = static_cast<uint64_t>(v) * static_cast<uint64_t>(R);
const uint32_t t1 = static_cast<uint32_t>(t0 >> (2 * Q_BIT_WIDTH));
const uint32_t t2 = t1 * Q;
const uint32_t t = v - t2;
return reduce_once(t);
}
public:
// Constructor(s)
inline constexpr zq_t() = default;
inline constexpr zq_t(const uint16_t a /* Expects a ∈ [0, Q) */) { this->v = a; }
static inline constexpr zq_t from_non_reduced(const uint16_t a /* Doesn't expect that a ∈ [0, Q) */) { return barrett_reduce(a); }
// Returns canonical value held under Zq type. Returned value must ∈ [0, Q).
inline constexpr uint32_t raw() const { return this->v; }
// Returns prime field element 0.
static inline constexpr zq_t zero() { return zq_t(); }
// Returns prime field element 1.
static inline constexpr zq_t zero() { return zq_t(0u); }
static inline constexpr zq_t one() { return zq_t(1u); }
// Modulo addition of two Zq elements.
inline constexpr zq_t operator+(const zq_t rhs) const
{
const uint32_t t = this->v + rhs.v;
return zq_t(reduce_once(t));
}
// Compound modulo addition of two Zq elements.
inline constexpr void operator+=(const zq_t rhs) { *this = *this + rhs; }
inline constexpr zq_t operator+(const zq_t& rhs) const { return reduce_once(this->v + rhs.v); }
inline constexpr void operator+=(const zq_t& rhs) { *this = *this + rhs; }
// Modulo negation of a Zq element.
inline constexpr zq_t operator-() const { return zq_t(Q - this->v); }
// Modulo subtraction of one Zq element from another one.
inline constexpr zq_t operator-(const zq_t rhs) const { return *this + (-rhs); }
// Compound modulo subtraction of two Zq elements.
inline constexpr void operator-=(const zq_t rhs) { *this = *this - rhs; }
inline constexpr zq_t operator-(const zq_t& rhs) const { return *this + (-rhs); }
inline constexpr void operator-=(const zq_t& rhs) { *this = *this - rhs; }
// Modulo multiplication of two Zq elements.
inline constexpr zq_t operator*(const zq_t rhs) const
{
auto res = zq_t();
res.v = barrett_reduce(this->v * rhs.v);
return res;
}
// Compound modulo multiplication of two Zq elements.
inline constexpr void operator*=(const zq_t rhs) { *this = *this * rhs; }
inline constexpr zq_t operator*(const zq_t& rhs) const { return barrett_reduce(this->v * rhs.v); }
inline constexpr void operator*=(const zq_t& rhs) { *this = *this * rhs; }
// Modulo exponentiation of Zq element.
//
// Taken from
// https://github.com/itzmeanjan/dilithium/blob/3fe6ab61d2d70c1a0b71fc6ed4449f64da08b020/include/field.hpp#L144-L167.
// Taken from https://github.com/itzmeanjan/dilithium/blob/3fe6ab61/include/field.hpp#L144-L167.
inline constexpr zq_t operator^(const size_t n) const
{
zq_t base = *this;
@@ -98,58 +105,23 @@ public:
return res;
}
// Multiplicative inverse of Zq element.
// Multiplicative inverse of Zq element. Also division of one Zq element by another one.
//
// Note, if Zq element is 0, we can't compute multiplicative inverse and 0 is
// returned.
// Note, if Zq element is 0, we can't compute multiplicative inverse and 0 is returned.
inline constexpr zq_t inv() const { return *this ^ static_cast<size_t>((Q - 2)); }
inline constexpr zq_t operator/(const zq_t& rhs) const { return *this * rhs.inv(); }
// Modulo division of two Zq elements.
//
// Note, if denominator is 0, returned result is 0 too, becaue we can't
// compute multiplicative inverse of 0.
inline constexpr zq_t operator/(const zq_t rhs) const { return *this * rhs.inv(); }
// Compare two Zq elements, returning truth value, in case they are same,
// otherwise returns false value.
inline constexpr bool operator==(const zq_t rhs) const { return this->v == rhs.v; }
// Comparison operators, see https://en.cppreference.com/w/cpp/language/default_comparisons
inline constexpr auto operator<=>(const zq_t&) const = default;
// Samples a random Zq element, using pseudo random number generator.
static inline zq_t random(prng::prng_t& prng)
template<size_t bit_security_level>
static inline zq_t random(ml_kem_prng::prng_t<bit_security_level>& prng)
{
uint16_t res = 0;
prng.read(std::span(reinterpret_cast<uint8_t*>(&res), sizeof(res)));
return zq_t(barrett_reduce(static_cast<uint32_t>(res)));
}
private:
// Underlying value held in this type.
//
// Note, v is always kept in its canonical form i.e. v ∈ [0, Q).
uint32_t v = 0u;
// Given a 32 -bit unsigned integer `v` such that `v` ∈ [0, Q*Q), this routine can be invoked for reducing `v` modulo Q, using
// barrett reduction technique, following algorithm description @ https://www.nayuki.io/page/barrett-reduction-algorithm.
static inline constexpr uint32_t barrett_reduce(const uint32_t v)
{
const uint64_t t0 = static_cast<uint64_t>(v) * static_cast<uint64_t>(R);
const uint32_t t1 = static_cast<uint32_t>(t0 >> (2 * RADIX_BIT_WIDTH));
const uint32_t t2 = t1 * Q;
const uint32_t t = v - t2;
return reduce_once(t);
}
// Given a 32 -bit unsigned integer `v` such that `v` ∈ [0, 2*Q), this routine can be invoked for reducing `v` modulo prime Q.
static inline constexpr uint32_t reduce_once(const uint32_t v)
{
const uint32_t t0 = v - Q;
const uint32_t t1 = -(t0 >> 31);
const uint32_t t2 = Q & t1;
const uint32_t t3 = t0 + t2;
return t3;
return zq_t::from_non_reduced(static_cast<uint32_t>(res));
}
};

View File

@@ -0,0 +1,156 @@
#pragma once
#include "k_pke.hpp"
#include "ml_kem/internals/utility/utils.hpp"
#include "sha3_256.hpp"
#include "sha3_512.hpp"
#include "shake256.hpp"
#include <algorithm>
// Key Encapsulation Mechanism
namespace ml_kem {
// ML-KEM key generation algorithm, generating byte serialized public key and secret key, given 32 -bytes seed `d` and `z`.
// See algorithm 15 defined in ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd
template<size_t k, size_t eta1>
static inline constexpr void
keygen(std::span<const uint8_t, 32> d, // used in CPA-PKE
std::span<const uint8_t, 32> z, // used in CCA-KEM
std::span<uint8_t, ml_kem_utils::get_kem_public_key_len(k)> pubkey,
std::span<uint8_t, ml_kem_utils::get_kem_secret_key_len(k)> seckey)
requires(ml_kem_params::check_keygen_params(k, eta1))
{
constexpr size_t skoff0 = k * 12 * 32;
constexpr size_t skoff1 = skoff0 + pubkey.size();
constexpr size_t skoff2 = skoff1 + 32;
auto _seckey0 = seckey.template subspan<0, skoff0>();
auto _seckey1 = seckey.template subspan<skoff0, skoff1 - skoff0>();
auto _seckey2 = seckey.template subspan<skoff1, skoff2 - skoff1>();
auto _seckey3 = seckey.template subspan<skoff2, seckey.size() - skoff2>();
k_pke::keygen<k, eta1>(d, pubkey, _seckey0);
std::copy(pubkey.begin(), pubkey.end(), _seckey1.begin());
std::copy(z.begin(), z.end(), _seckey3.begin());
sha3_256::sha3_256_t hasher{};
hasher.absorb(pubkey);
hasher.finalize();
hasher.digest(_seckey2);
hasher.reset();
}
// Given ML-KEM public key and 32 -bytes seed ( used for deriving 32 -bytes message & 32 -bytes random coin ), this routine computes
// ML-KEM cipher text which can be shared with recipient party ( owning corresponding secret key ) over insecure channel.
//
// It also computes a fixed length 32 -bytes shared secret, which can be used for fast symmetric key encryption between these
// two participating entities. Alternatively they might choose to derive longer keys from this shared secret. Other side of
// communication should also be able to generate same 32 -byte shared secret, after successful decryption of cipher text.
//
// If invalid ML-KEM public key is input, this function execution will fail, returning false.
//
// See algorithm 16 defined in ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd
template<size_t k, size_t eta1, size_t eta2, size_t du, size_t dv>
[[nodiscard("Use result, it might fail because of malformed input public key")]] static inline constexpr bool
encapsulate(std::span<const uint8_t, 32> m,
std::span<const uint8_t, ml_kem_utils::get_kem_public_key_len(k)> pubkey,
std::span<uint8_t, ml_kem_utils::get_kem_cipher_text_len(k, du, dv)> cipher,
std::span<uint8_t, 32> shared_secret)
requires(ml_kem_params::check_encap_params(k, eta1, eta2, du, dv))
{
std::array<uint8_t, m.size() + sha3_256::DIGEST_LEN> g_in{};
std::array<uint8_t, sha3_512::DIGEST_LEN> g_out{};
auto _g_in = std::span(g_in);
auto _g_in0 = _g_in.template first<m.size()>();
auto _g_in1 = _g_in.template last<sha3_256::DIGEST_LEN>();
auto _g_out = std::span(g_out);
auto _g_out0 = _g_out.template first<shared_secret.size()>();
auto _g_out1 = _g_out.template last<_g_out.size() - _g_out0.size()>();
std::copy(m.begin(), m.end(), _g_in0.begin());
sha3_256::sha3_256_t h256{};
h256.absorb(pubkey);
h256.finalize();
h256.digest(_g_in1);
sha3_512::sha3_512_t h512{};
h512.absorb(_g_in);
h512.finalize();
h512.digest(_g_out);
const auto has_mod_check_passed = k_pke::encrypt<k, eta1, eta2, du, dv>(pubkey, m, _g_out1, cipher);
if (!has_mod_check_passed) {
// Got an invalid public key
return has_mod_check_passed;
}
std::copy(_g_out0.begin(), _g_out0.end(), shared_secret.begin());
return true;
}
// Given ML-KEM secret key and cipher text, this routine recovers 32 -bytes plain text which was encrypted by sender,
// using ML-KEM public key, associated with this secret key.
//
// Recovered 32 -bytes plain text is used for deriving a 32 -bytes shared secret key, which can now be
// used for encrypting communication between two participating parties, using fast symmetric key algorithms.
//
// See algorithm 17 defined in ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
template<size_t k, size_t eta1, size_t eta2, size_t du, size_t dv>
static inline constexpr void
decapsulate(std::span<const uint8_t, ml_kem_utils::get_kem_secret_key_len(k)> seckey,
std::span<const uint8_t, ml_kem_utils::get_kem_cipher_text_len(k, du, dv)> cipher,
std::span<uint8_t, 32> shared_secret)
requires(ml_kem_params::check_decap_params(k, eta1, eta2, du, dv))
{
constexpr size_t sklen = k * 12 * 32;
constexpr size_t pklen = k * 12 * 32 + 32;
constexpr size_t ctlen = cipher.size();
constexpr size_t skoff0 = sklen;
constexpr size_t skoff1 = skoff0 + pklen;
constexpr size_t skoff2 = skoff1 + 32;
auto pke_sk = seckey.template subspan<0, skoff0>();
auto pubkey = seckey.template subspan<skoff0, skoff1 - skoff0>();
auto h = seckey.template subspan<skoff1, skoff2 - skoff1>();
auto z = seckey.template subspan<skoff2, seckey.size() - skoff2>();
std::array<uint8_t, 32 + h.size()> g_in{};
std::array<uint8_t, shared_secret.size() + 32> g_out{};
std::array<uint8_t, shared_secret.size()> j_out{};
std::array<uint8_t, cipher.size()> c_prime{};
auto _g_in = std::span(g_in);
auto _g_in0 = _g_in.template first<32>();
auto _g_in1 = _g_in.template last<h.size()>();
auto _g_out = std::span(g_out);
auto _g_out0 = _g_out.template first<shared_secret.size()>();
auto _g_out1 = _g_out.template last<32>();
k_pke::decrypt<k, du, dv>(pke_sk, cipher, _g_in0);
std::copy(h.begin(), h.end(), _g_in1.begin());
sha3_512::sha3_512_t h512{};
h512.absorb(_g_in);
h512.finalize();
h512.digest(_g_out);
shake256::shake256_t xof256{};
xof256.absorb(z);
xof256.absorb(cipher);
xof256.finalize();
xof256.squeeze(j_out);
// Explicitly ignore return value, because public key, held as part of secret key is *assumed* to be valid.
(void)k_pke::encrypt<k, eta1, eta2, du, dv>(pubkey, _g_in0, _g_out1, c_prime);
// line 9-12 of algorithm 17, in constant-time
using kdf_t = std::span<const uint8_t, shared_secret.size()>;
const uint32_t cond = ml_kem_utils::ct_memcmp(cipher, std::span<const uint8_t, ctlen>(c_prime));
ml_kem_utils::ct_cond_memcpy(cond, shared_secret, kdf_t(_g_out0), kdf_t(z));
}
}

View File

@@ -0,0 +1,70 @@
#pragma once
#include "ml_kem/internals/math/field.hpp"
#include "ml_kem/internals/poly/ntt.hpp"
#include "ml_kem/internals/utility/params.hpp"
#include <span>
namespace ml_kem_utils {
// Given an element x ∈ Z_q | q = 3329, this routine compresses it by discarding some low-order bits, computing y ∈ [0, 2^d) | d < round(log2(q)).
//
// See formula 4.5 on page 18 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
// Following implementation collects inspiration from https://github.com/FiloSottile/mlkem768/blob/cffbfb96/mlkem768.go#L395-L425.
template<size_t d>
static inline constexpr ml_kem_field::zq_t
compress(const ml_kem_field::zq_t x)
requires(ml_kem_params::check_d(d))
{
constexpr uint16_t mask = (1u << d) - 1;
const auto dividend = x.raw() << d;
const auto quotient0 = static_cast<uint32_t>((static_cast<uint64_t>(dividend) * ml_kem_field::R) >> (ml_kem_field::Q_BIT_WIDTH * 2));
const auto remainder = dividend - quotient0 * ml_kem_field::Q;
const auto quotient1 = quotient0 + ((((ml_kem_field::Q / 2) - remainder) >> 31) & 1);
const auto quotient2 = quotient1 + (((ml_kem_field::Q + (ml_kem_field::Q / 2) - remainder) >> 31) & 1);
return ml_kem_field::zq_t(static_cast<uint16_t>(quotient2) & mask);
}
// Given an element x ∈ [0, 2^d) | d < round(log2(q)), this routine decompresses it back to y ∈ Z_q | q = 3329.
//
// See formula 4.6 on page 18 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
template<size_t d>
static inline constexpr ml_kem_field::zq_t
decompress(const ml_kem_field::zq_t x)
requires(ml_kem_params::check_d(d))
{
constexpr uint32_t t0 = 1u << d;
constexpr uint32_t t1 = t0 >> 1;
const uint32_t t2 = ml_kem_field::Q * x.raw();
const uint32_t t3 = t2 + t1;
const uint16_t t4 = static_cast<uint16_t>(t3 >> d);
return ml_kem_field::zq_t(t4);
}
// Utility function to compress each of 256 coefficients of a degree-255 polynomial while mutating the input.
template<size_t d>
static inline constexpr void
poly_compress(std::span<ml_kem_field::zq_t, ml_kem_ntt::N> poly)
requires(ml_kem_params::check_d(d))
{
for (size_t i = 0; i < poly.size(); i++) {
poly[i] = compress<d>(poly[i]);
}
}
// Utility function to decompress each of 256 coefficients of a degree-255 polynomial while mutating the input.
template<size_t d>
static inline constexpr void
poly_decompress(std::span<ml_kem_field::zq_t, ml_kem_ntt::N> poly)
requires(ml_kem_params::check_d(d))
{
for (size_t i = 0; i < poly.size(); i++) {
poly[i] = decompress<d>(poly[i]);
}
}
}

View File

@@ -0,0 +1,193 @@
#pragma once
#include "ml_kem/internals/math/field.hpp"
namespace ml_kem_ntt {
static constexpr size_t LOG2N = 8;
static constexpr size_t N = 1 << LOG2N;
// First primitive 256 -th root of unity modulo q | q = 3329
//
// Meaning, 17 ** 256 == 1 mod q
static constexpr auto ζ = ml_kem_field::zq_t(17);
// Multiplicative inverse of N/ 2 over Z_q | q = 3329 and N = 256
//
// Meaning (N/ 2) * INV_N = 1 mod q
static constexpr auto INV_N = ml_kem_field::zq_t(N / 2).inv();
// Given a 64 -bit unsigned integer, this routine extracts specified many contiguous bits from ( least significant bits ) LSB side
// and reverses their bit order, returning bit reversed `mbw` -bit wide number.
//
// See https://github.com/itzmeanjan/falcon/blob/45b0593/include/ntt.hpp#L30-L38 for source of inspiration.
template<size_t mbw>
static inline constexpr size_t
bit_rev(const size_t v)
{
size_t v_rev = 0ul;
for (size_t i = 0; i < mbw; i++) {
const size_t bit = (v >> i) & 0b1;
v_rev ^= bit << (mbw - 1ul - i);
}
return v_rev;
}
// Compile-time computed constants ( powers of ζ ), used for polynomial evaluation i.e. computation of NTT form.
static constexpr std::array<ml_kem_field::zq_t, N / 2> NTT_ζ_EXP = []() -> auto {
std::array<ml_kem_field::zq_t, N / 2> res{};
for (size_t i = 0; i < res.size(); i++) {
res[i] = ζ ^ bit_rev<LOG2N - 1>(i);
}
return res;
}();
// Compile-time computed constants ( negated powers of ζ ), used for polynomial interpolation i.e. computation of iNTT form.
static constexpr std::array<ml_kem_field::zq_t, N / 2> INTT_ζ_EXP = []() -> auto {
std::array<ml_kem_field::zq_t, N / 2> res{};
for (size_t i = 0; i < res.size(); i++) {
res[i] = -NTT_ζ_EXP[i];
}
return res;
}();
// Compile-time computed constants ( powers of ζ ), used when multiplying two degree-255 polynomials in NTT domain.
static constexpr std::array<ml_kem_field::zq_t, N / 2> POLY_MUL_ζ_EXP = []() -> auto {
std::array<ml_kem_field::zq_t, N / 2> res{};
for (size_t i = 0; i < res.size(); i++) {
res[i] = ζ ^ ((bit_rev<LOG2N - 1>(i) << 1) ^ 1);
}
return res;
}();
// Given a polynomial f with 256 coefficients over F_q | q = 3329, this routine computes number theoretic transform
// using Cooley-Tukey algorithm, producing polynomial f' s.t. its coefficients are placed in bit-reversed order.
//
// Note, this routine mutates input i.e. it's an in-place NTT implementation.
//
// Implementation inspired from https://github.com/itzmeanjan/falcon/blob/45b0593/include/ntt.hpp#L69-L144.
// See algorithm 8 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
static inline constexpr void
ntt(std::span<ml_kem_field::zq_t, N> poly)
{
for (size_t l = LOG2N - 1; l >= 1; l--) {
const size_t len = 1ul << l;
const size_t lenx2 = len << 1;
const size_t k_beg = N >> (l + 1);
for (size_t start = 0; start < poly.size(); start += lenx2) {
const size_t k_now = k_beg + (start >> (l + 1));
// Looking up precomputed constant, though it can be computed using
//
// ζ ^ bit_rev<LOG2N - 1>(k_now)
//
// This is how these constants are generated !
const ml_kem_field::zq_t ζ_exp = NTT_ζ_EXP[k_now];
for (size_t i = start; i < start + len; i++) {
auto tmp = ζ_exp;
tmp *= poly[i + len];
poly[i + len] = poly[i] - tmp;
poly[i] += tmp;
}
}
}
}
// Given a polynomial f with 256 coefficients over F_q | q = 3329, s.t. its coefficients are placed in bit-reversed order,
// this routine computes inverse number theoretic transform using Gentleman-Sande algorithm, producing polynomial f' s.t.
// its coefficients are placed in standard order.
//
// Note, this routine mutates input i.e. it's an in-place iNTT implementation.
//
// Implementation inspired from https://github.com/itzmeanjan/falcon/blob/45b0593/include/ntt.hpp#L146-L224.
// See algorithm 9 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
static inline constexpr void
intt(std::span<ml_kem_field::zq_t, N> poly)
{
for (size_t l = 1; l < LOG2N; l++) {
const size_t len = 1ul << l;
const size_t lenx2 = len << 1;
const size_t k_beg = (N >> l) - 1;
for (size_t start = 0; start < poly.size(); start += lenx2) {
const size_t k_now = k_beg - (start >> (l + 1));
// Looking up precomputed constant, though it can be computed using
//
// -(ζ ^ bit_rev<LOG2N - 1>(k_now))
//
// Or simpler
//
// -NTT_ζ_EXP[k_now]
const ml_kem_field::zq_t neg_ζ_exp = INTT_ζ_EXP[k_now];
for (size_t i = start; i < start + len; i++) {
const auto tmp = poly[i];
poly[i] += poly[i + len];
poly[i + len] = tmp - poly[i + len];
poly[i + len] *= neg_ζ_exp;
}
}
}
for (size_t i = 0; i < poly.size(); i++) {
poly[i] *= INV_N;
}
}
// Given two degree-1 polynomials, this routine computes resulting degree-1 polynomial h.
// See algorithm 11 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
static inline constexpr void
basemul(std::span<const ml_kem_field::zq_t, 2> f, std::span<const ml_kem_field::zq_t, 2> g, std::span<ml_kem_field::zq_t, 2> h, const ml_kem_field::zq_t ζ)
{
ml_kem_field::zq_t f0 = f[0];
ml_kem_field::zq_t f1 = f[1];
f0 *= g[0];
f1 *= g[1];
f1 *= ζ;
f1 += f0;
h[0] = f1;
ml_kem_field::zq_t g0 = g[0];
ml_kem_field::zq_t g1 = g[1];
g1 *= f[0];
g0 *= f[1];
g1 += g0;
h[1] = g1;
}
// Given two degree-255 polynomials in NTT form, this routine performs 128
// base case multiplications for 128 pairs of degree-1 polynomials s.t.
//
// f = (f0ˆ + f1ˆX, f2ˆ + f3ˆX, ..., f254ˆ + f255ˆX)
// g = (g0ˆ + g1ˆX, g2ˆ + g3ˆX, ..., g254ˆ + g255ˆX)
//
// h = f ◦ g
//
// See algorithm 10 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
static inline constexpr void
polymul(std::span<const ml_kem_field::zq_t, N> f, std::span<const ml_kem_field::zq_t, N> g, std::span<ml_kem_field::zq_t, N> h)
{
using poly_t = std::span<const ml_kem_field::zq_t, 2>;
using mut_poly_t = std::span<ml_kem_field::zq_t, 2>;
for (size_t i = 0; i < f.size() / 2; i++) {
const size_t off = i * 2;
basemul(poly_t(f.subspan(off, 2)), poly_t(g.subspan(off, 2)), mut_poly_t(h.subspan(off, 2)), POLY_MUL_ζ_EXP[i]);
}
}
}

View File

@@ -0,0 +1,162 @@
#pragma once
#include "ml_kem/internals/math/field.hpp"
#include "ml_kem/internals/poly/compression.hpp"
#include "ml_kem/internals/poly/ntt.hpp"
#include "ml_kem/internals/poly/serialize.hpp"
#include "ml_kem/internals/utility/params.hpp"
namespace ml_kem_utils {
// Given two matrices ( in NTT domain ) of compatible dimension, where each matrix element is a degree-255 polynomial over Z_q | q = 3329,
// this routine multiplies them, computing a resulting matrix.
template<size_t a_rows, size_t a_cols, size_t b_rows, size_t b_cols>
static inline constexpr void
matrix_multiply(std::span<const ml_kem_field::zq_t, a_rows * a_cols * ml_kem_ntt::N> a,
std::span<const ml_kem_field::zq_t, b_rows * b_cols * ml_kem_ntt::N> b,
std::span<ml_kem_field::zq_t, a_rows * b_cols * ml_kem_ntt::N> c)
requires(ml_kem_params::check_matrix_dim(a_cols, b_rows))
{
using poly_t = std::span<const ml_kem_field::zq_t, ml_kem_ntt::N>;
std::array<ml_kem_field::zq_t, ml_kem_ntt::N> tmp{};
auto _tmp = std::span(tmp);
for (size_t i = 0; i < a_rows; i++) {
for (size_t j = 0; j < b_cols; j++) {
const size_t coff = (i * b_cols + j) * ml_kem_ntt::N;
for (size_t k = 0; k < a_cols; k++) {
const size_t aoff = (i * a_cols + k) * ml_kem_ntt::N;
const size_t boff = (k * b_cols + j) * ml_kem_ntt::N;
ml_kem_ntt::polymul(poly_t(a.subspan(aoff, ml_kem_ntt::N)), poly_t(b.subspan(boff, ml_kem_ntt::N)), _tmp);
for (size_t l = 0; l < ml_kem_ntt::N; l++) {
c[coff + l] += tmp[l];
}
}
}
}
}
// Given a vector ( of dimension `k x 1` ) of degree-255 polynomials ( where polynomial coefficients are in non-NTT form ),
// this routine applies in-place polynomial NTT over `k` polynomials.
template<size_t k>
static inline constexpr void
poly_vec_ntt(std::span<ml_kem_field::zq_t, k * ml_kem_ntt::N> vec)
requires((k == 1) || ml_kem_params::check_k(k))
{
using poly_t = std::span<ml_kem_field::zq_t, ml_kem_ntt::N>;
for (size_t i = 0; i < k; i++) {
const size_t off = i * ml_kem_ntt::N;
ml_kem_ntt::ntt(poly_t(vec.subspan(off, ml_kem_ntt::N)));
}
}
// Given a vector ( of dimension `k x 1` ) of degree-255 polynomials ( where polynomial coefficients are in NTT form i.e.
// they are placed in bit-reversed order ), this routine applies in-place polynomial iNTT over those `k` polynomials.
template<size_t k>
static inline constexpr void
poly_vec_intt(std::span<ml_kem_field::zq_t, k * ml_kem_ntt::N> vec)
requires((k == 1) || ml_kem_params::check_k(k))
{
using poly_t = std::span<ml_kem_field::zq_t, ml_kem_ntt::N>;
for (size_t i = 0; i < k; i++) {
const size_t off = i * ml_kem_ntt::N;
ml_kem_ntt::intt(poly_t(vec.subspan(off, ml_kem_ntt::N)));
}
}
// Given a vector ( of dimension `k x 1` ) of degree-255 polynomials, this routine adds it to another polynomial vector of same dimension.
template<size_t k>
static inline constexpr void
poly_vec_add_to(std::span<const ml_kem_field::zq_t, k * ml_kem_ntt::N> src, std::span<ml_kem_field::zq_t, k * ml_kem_ntt::N> dst)
requires((k == 1) || ml_kem_params::check_k(k))
{
constexpr size_t cnt = k * ml_kem_ntt::N;
for (size_t i = 0; i < cnt; i++) {
dst[i] += src[i];
}
}
// Given a vector ( of dimension `k x 1` ) of degree-255 polynomials, this routine subtracts it to another polynomial vector of same dimension.
template<size_t k>
static inline constexpr void
poly_vec_sub_from(std::span<const ml_kem_field::zq_t, k * ml_kem_ntt::N> src, std::span<ml_kem_field::zq_t, k * ml_kem_ntt::N> dst)
requires((k == 1) || ml_kem_params::check_k(k))
{
constexpr size_t cnt = k * ml_kem_ntt::N;
for (size_t i = 0; i < cnt; i++) {
dst[i] -= src[i];
}
}
// Given a vector ( of dimension `k x 1` ) of degree-255 polynomials, this routine encodes each of those polynomials into 32 x l -bytes,
// writing to a (k x 32 x l) -bytes destination array.
template<size_t k, size_t l>
static inline constexpr void
poly_vec_encode(std::span<const ml_kem_field::zq_t, k * ml_kem_ntt::N> src, std::span<uint8_t, k * 32 * l> dst)
requires(ml_kem_params::check_k(k))
{
using poly_t = std::span<const ml_kem_field::zq_t, src.size() / k>;
using serialized_t = std::span<uint8_t, dst.size() / k>;
for (size_t i = 0; i < k; i++) {
const size_t off0 = i * ml_kem_ntt::N;
const size_t off1 = i * l * 32;
ml_kem_utils::encode<l>(poly_t(src.subspan(off0, ml_kem_ntt::N)), serialized_t(dst.subspan(off1, 32 * l)));
}
}
// Given a byte array of length (k x 32 x l) -bytes, this routine decodes them into k degree-255 polynomials, writing them to a
// column vector of dimension `k x 1`.
template<size_t k, size_t l>
static inline constexpr void
poly_vec_decode(std::span<const uint8_t, k * 32 * l> src, std::span<ml_kem_field::zq_t, k * ml_kem_ntt::N> dst)
requires(ml_kem_params::check_k(k))
{
using serialized_t = std::span<const uint8_t, src.size() / k>;
using poly_t = std::span<ml_kem_field::zq_t, dst.size() / k>;
for (size_t i = 0; i < k; i++) {
const size_t off0 = i * l * 32;
const size_t off1 = i * ml_kem_ntt::N;
ml_kem_utils::decode<l>(serialized_t(src.subspan(off0, 32 * l)), poly_t(dst.subspan(off1, ml_kem_ntt::N)));
}
}
// Given a vector ( of dimension `k x 1` ) of degree-255 polynomials, each of k * 256 coefficients are compressed, while mutating input.
template<size_t k, size_t d>
static inline constexpr void
poly_vec_compress(std::span<ml_kem_field::zq_t, k * ml_kem_ntt::N> vec)
requires(ml_kem_params::check_k(k))
{
using poly_t = std::span<ml_kem_field::zq_t, vec.size() / k>;
for (size_t i = 0; i < k; i++) {
const size_t off = i * ml_kem_ntt::N;
ml_kem_utils::poly_compress<d>(poly_t(vec.subspan(off, ml_kem_ntt::N)));
}
}
// Given a vector ( of dimension `k x 1` ) of degree-255 polynomials, each of k * 256 coefficients are decompressed, while mutating input.
template<size_t k, size_t d>
static inline constexpr void
poly_vec_decompress(std::span<ml_kem_field::zq_t, k * ml_kem_ntt::N> vec)
requires(ml_kem_params::check_k(k))
{
using poly_t = std::span<ml_kem_field::zq_t, vec.size() / k>;
for (size_t i = 0; i < k; i++) {
const size_t off = i * ml_kem_ntt::N;
ml_kem_utils::poly_decompress<d>(poly_t(vec.subspan(off, ml_kem_ntt::N)));
}
}
}

View File

@@ -0,0 +1,158 @@
#pragma once
#include "ml_kem/internals/math/field.hpp"
#include "ml_kem/internals/poly/ntt.hpp"
#include "ml_kem/internals/utility/params.hpp"
#include "shake128.hpp"
#include "shake256.hpp"
#include <limits>
namespace ml_kem_utils {
// Uniform sampling in R_q | q = 3329.
//
// Given a byte stream, this routine *deterministically* samples a degree 255 polynomial in NTT representation.
// If the byte stream is statistically close to uniform random byte stream, produced polynomial coefficients are also
// statiscally close to randomly sampled elements of R_q.
//
// See algorithm 6 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
inline constexpr void
sample_ntt(shake128::shake128_t& hasher, std::span<ml_kem_field::zq_t, ml_kem_ntt::N> poly)
{
constexpr size_t n = poly.size();
size_t coeff_idx = 0;
std::array<uint8_t, shake128::RATE / std::numeric_limits<uint8_t>::digits> buf{};
while (coeff_idx < n) {
hasher.squeeze(buf);
for (size_t off = 0; (off < buf.size()) && (coeff_idx < n); off += 3) {
const uint16_t d1 = (static_cast<uint16_t>(buf[off + 1] & 0x0f) << 8) | static_cast<uint16_t>(buf[off + 0]);
const uint16_t d2 = (static_cast<uint16_t>(buf[off + 2]) << 4) | (static_cast<uint16_t>(buf[off + 1] >> 4));
if (d1 < ml_kem_field::Q) {
poly[coeff_idx] = ml_kem_field::zq_t(d1);
coeff_idx++;
}
if ((d2 < ml_kem_field::Q) && (coeff_idx < n)) {
poly[coeff_idx] = ml_kem_field::zq_t(d2);
coeff_idx++;
}
}
}
}
// Generate public matrix A ( consists of degree-255 polynomials ) in NTT domain, by sampling from a XOF ( read SHAKE128 ),
// which is seeded with 32 -bytes key and two nonces ( each of 1 -byte ).
//
// See step (4-8) of algorithm 12/ 13 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
template<size_t k, bool transpose>
static inline constexpr void
generate_matrix(std::span<ml_kem_field::zq_t, k * k * ml_kem_ntt::N> mat, std::span<const uint8_t, 32> rho)
requires(ml_kem_params::check_k(k))
{
std::array<uint8_t, rho.size() + 2> xof_in{};
std::copy(rho.begin(), rho.end(), xof_in.begin());
for (size_t i = 0; i < k; i++) {
for (size_t j = 0; j < k; j++) {
const size_t off = (i * k + j) * ml_kem_ntt::N;
if constexpr (transpose) {
xof_in[32] = static_cast<uint8_t>(i);
xof_in[33] = static_cast<uint8_t>(j);
} else {
xof_in[32] = static_cast<uint8_t>(j);
xof_in[33] = static_cast<uint8_t>(i);
}
shake128::shake128_t hasher;
hasher.absorb(xof_in);
hasher.finalize();
using poly_t = std::span<ml_kem_field::zq_t, mat.size() / (k * k)>;
sample_ntt(hasher, poly_t(mat.subspan(off, ml_kem_ntt::N)));
}
}
}
// Centered Binomial Distribution.
// A degree 255 polynomial deterministically sampled from `64 * eta` -bytes output of a pseudorandom function ( PRF ).
//
// See algorithm 7 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
template<size_t eta>
static inline constexpr void
sample_poly_cbd(std::span<const uint8_t, 64 * eta> prf, std::span<ml_kem_field::zq_t, ml_kem_ntt::N> poly)
requires(ml_kem_params::check_eta(eta))
{
if constexpr (eta == 2) {
static_assert(eta == 2, "η must be 2 !");
constexpr size_t till = 64 * eta;
constexpr uint8_t mask8 = 0b01010101;
constexpr uint8_t mask2 = 0b11;
for (size_t i = 0; i < till; i++) {
const size_t poff = i << 1;
const uint8_t word = prf[i];
const uint8_t t0 = (word >> 0) & mask8;
const uint8_t t1 = (word >> 1) & mask8;
const uint8_t t2 = t0 + t1;
poly[poff + 0] = ml_kem_field::zq_t((t2 >> 0) & mask2) - ml_kem_field::zq_t((t2 >> 2) & mask2);
poly[poff + 1] = ml_kem_field::zq_t((t2 >> 4) & mask2) - ml_kem_field::zq_t((t2 >> 6) & mask2);
}
} else {
static_assert(eta == 3, "η must be 3 !");
constexpr size_t till = 64;
constexpr uint32_t mask24 = 0b001001001001001001001001u;
constexpr uint32_t mask3 = 0b111u;
for (size_t i = 0; i < till; i++) {
const size_t boff = i * 3;
const size_t poff = i << 2;
const uint32_t word = (static_cast<uint32_t>(prf[boff + 2]) << 16) | (static_cast<uint32_t>(prf[boff + 1]) << 8) | static_cast<uint32_t>(prf[boff + 0]);
const uint32_t t0 = (word >> 0) & mask24;
const uint32_t t1 = (word >> 1) & mask24;
const uint32_t t2 = (word >> 2) & mask24;
const uint32_t t3 = t0 + t1 + t2;
poly[poff + 0] = ml_kem_field::zq_t((t3 >> 0) & mask3) - ml_kem_field::zq_t((t3 >> 3) & mask3);
poly[poff + 1] = ml_kem_field::zq_t((t3 >> 6) & mask3) - ml_kem_field::zq_t((t3 >> 9) & mask3);
poly[poff + 2] = ml_kem_field::zq_t((t3 >> 12) & mask3) - ml_kem_field::zq_t((t3 >> 15) & mask3);
poly[poff + 3] = ml_kem_field::zq_t((t3 >> 18) & mask3) - ml_kem_field::zq_t((t3 >> 21) & mask3);
}
}
}
// Sample a polynomial vector from Bη, following step (9-12) of algorithm 12/ 13 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
template<size_t k, size_t eta>
static inline constexpr void
generate_vector(std::span<ml_kem_field::zq_t, k * ml_kem_ntt::N> vec, std::span<const uint8_t, 32> sigma, const uint8_t nonce)
requires((k == 1) || ml_kem_params::check_k(k))
{
std::array<uint8_t, 64 * eta> prf_out{};
std::array<uint8_t, sigma.size() + 1> prf_in{};
std::copy(sigma.begin(), sigma.end(), prf_in.begin());
for (size_t i = 0; i < k; i++) {
const size_t off = i * ml_kem_ntt::N;
prf_in[32] = nonce + static_cast<uint8_t>(i);
shake256::shake256_t hasher;
hasher.absorb(prf_in);
hasher.finalize();
hasher.squeeze(prf_out);
using poly_t = std::span<ml_kem_field::zq_t, vec.size() / k>;
ml_kem_utils::sample_poly_cbd<eta>(prf_out, poly_t(vec.subspan(off, ml_kem_ntt::N)));
}
}
}

View File

@@ -1,27 +1,23 @@
#pragma once
#include "field.hpp"
#include "ntt.hpp"
#include "params.hpp"
#include <cstring>
#include "ml_kem/internals/math/field.hpp"
#include "ml_kem/internals/poly/ntt.hpp"
#include "ml_kem/internals/utility/params.hpp"
// IND-CPA-secure Public Key Encryption Scheme Utilities
namespace kyber_utils {
namespace ml_kem_utils {
// Given a degree-255 polynomial, where significant portion of each ( total 256
// of them ) coefficient ∈ [0, 2^l), this routine serializes the polynomial to a
// byte array of length 32 * l -bytes
// Given a degree-255 polynomial, where significant portion of each ( total 256 of them ) coefficient ∈ [0, 2^l),
// this routine serializes the polynomial to a byte array of length 32 * l -bytes.
//
// See algorithm 3 described in section 1.1 ( page 7 ) of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
// See algorithm 4 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
template<size_t l>
static inline void
encode(std::span<const field::zq_t, ntt::N> poly, std::span<uint8_t, 32 * l> arr)
requires(kyber_params::check_l(l))
static inline constexpr void
encode(std::span<const ml_kem_field::zq_t, ml_kem_ntt::N> poly, std::span<uint8_t, 32 * l> arr)
requires(ml_kem_params::check_l(l))
{
std::fill(arr.begin(), arr.end(), 0);
if constexpr (l == 1) {
constexpr size_t itr_cnt = ntt::N >> 3;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 3;
constexpr uint32_t one = 0b1u;
for (size_t i = 0; i < itr_cnt; i++) {
@@ -32,7 +28,7 @@ encode(std::span<const field::zq_t, ntt::N> poly, std::span<uint8_t, 32 * l> arr
(static_cast<uint8_t>(poly[off + 1].raw() & one) << 1) | (static_cast<uint8_t>(poly[off + 0].raw() & one) << 0);
}
} else if constexpr (l == 4) {
constexpr size_t itr_cnt = ntt::N >> 1;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 1;
constexpr uint32_t msk = 0b1111u;
for (size_t i = 0; i < itr_cnt; i++) {
@@ -40,7 +36,7 @@ encode(std::span<const field::zq_t, ntt::N> poly, std::span<uint8_t, 32 * l> arr
arr[i] = (static_cast<uint8_t>(poly[off + 1].raw() & msk) << 4) | static_cast<uint8_t>(poly[off + 0].raw() & msk);
}
} else if constexpr (l == 5) {
constexpr size_t itr_cnt = ntt::N >> 3;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 3;
constexpr uint32_t mask5 = 0b11111u;
constexpr uint32_t mask4 = 0b1111u;
constexpr uint32_t mask3 = 0b111u;
@@ -67,7 +63,7 @@ encode(std::span<const field::zq_t, ntt::N> poly, std::span<uint8_t, 32 * l> arr
arr[boff + 4] = (static_cast<uint8_t>(t7 & mask5) << 3) | static_cast<uint8_t>((t6 >> 2) & mask3);
}
} else if constexpr (l == 10) {
constexpr size_t itr_cnt = ntt::N >> 2;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 2;
constexpr uint32_t mask6 = 0b111111u;
constexpr uint32_t mask4 = 0b1111u;
constexpr uint32_t mask2 = 0b11u;
@@ -88,7 +84,7 @@ encode(std::span<const field::zq_t, ntt::N> poly, std::span<uint8_t, 32 * l> arr
arr[boff + 4] = static_cast<uint8_t>(t3 >> 2);
}
} else if constexpr (l == 11) {
constexpr size_t itr_cnt = ntt::N >> 3;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 3;
constexpr uint32_t mask8 = 0b11111111u;
constexpr uint32_t mask7 = 0b1111111u;
constexpr uint32_t mask6 = 0b111111u;
@@ -126,7 +122,7 @@ encode(std::span<const field::zq_t, ntt::N> poly, std::span<uint8_t, 32 * l> arr
} else {
static_assert(l == 12, "l must be equal to 12 !");
constexpr size_t itr_cnt = ntt::N >> 1;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 1;
constexpr uint32_t mask4 = 0b1111u;
for (size_t i = 0; i < itr_cnt; i++) {
@@ -143,47 +139,45 @@ encode(std::span<const field::zq_t, ntt::N> poly, std::span<uint8_t, 32 * l> arr
}
}
// Given a byte array of length 32 * l -bytes this routine deserializes it to a
// polynomial of degree 255 s.t. significant portion of each ( total 256 of them
// ) coefficient ∈ [0, 2^l)
// Given a byte array of length 32 * l -bytes this routine deserializes it to a polynomial of degree 255 s.t. significant
// portion of each ( total 256 of them ) coefficient ∈ [0, 2^l).
//
// See algorithm 3 described in section 1.1 ( page 7 ) of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
// See algorithm 5 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
template<size_t l>
static inline void
decode(std::span<const uint8_t, 32 * l> arr, std::span<field::zq_t, ntt::N> poly)
requires(kyber_params::check_l(l))
static inline constexpr void
decode(std::span<const uint8_t, 32 * l> arr, std::span<ml_kem_field::zq_t, ml_kem_ntt::N> poly)
requires(ml_kem_params::check_l(l))
{
if constexpr (l == 1) {
constexpr size_t itr_cnt = ntt::N >> 3;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 3;
constexpr uint8_t one = 0b1;
for (size_t i = 0; i < itr_cnt; i++) {
const size_t off = i << 3;
const uint8_t byte = arr[i];
poly[off + 0] = field::zq_t((byte >> 0) & one);
poly[off + 1] = field::zq_t((byte >> 1) & one);
poly[off + 2] = field::zq_t((byte >> 2) & one);
poly[off + 3] = field::zq_t((byte >> 3) & one);
poly[off + 4] = field::zq_t((byte >> 4) & one);
poly[off + 5] = field::zq_t((byte >> 5) & one);
poly[off + 6] = field::zq_t((byte >> 6) & one);
poly[off + 7] = field::zq_t((byte >> 7) & one);
poly[off + 0] = ml_kem_field::zq_t((byte >> 0) & one);
poly[off + 1] = ml_kem_field::zq_t((byte >> 1) & one);
poly[off + 2] = ml_kem_field::zq_t((byte >> 2) & one);
poly[off + 3] = ml_kem_field::zq_t((byte >> 3) & one);
poly[off + 4] = ml_kem_field::zq_t((byte >> 4) & one);
poly[off + 5] = ml_kem_field::zq_t((byte >> 5) & one);
poly[off + 6] = ml_kem_field::zq_t((byte >> 6) & one);
poly[off + 7] = ml_kem_field::zq_t((byte >> 7) & one);
}
} else if constexpr (l == 4) {
constexpr size_t itr_cnt = ntt::N >> 1;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 1;
constexpr uint8_t mask = 0b1111;
for (size_t i = 0; i < itr_cnt; i++) {
const size_t off = i << 1;
const uint8_t byte = arr[i];
poly[off + 0] = field::zq_t((byte >> 0) & mask);
poly[off + 1] = field::zq_t((byte >> 4) & mask);
poly[off + 0] = ml_kem_field::zq_t((byte >> 0) & mask);
poly[off + 1] = ml_kem_field::zq_t((byte >> 4) & mask);
}
} else if constexpr (l == 5) {
constexpr size_t itr_cnt = ntt::N >> 3;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 3;
constexpr uint8_t mask5 = 0b11111;
constexpr uint8_t mask4 = 0b1111;
constexpr uint8_t mask3 = 0b111;
@@ -203,17 +197,17 @@ decode(std::span<const uint8_t, 32 * l> arr, std::span<field::zq_t, ntt::N> poly
const auto t6 = static_cast<uint16_t>((arr[boff + 4] & mask3) << 2) | static_cast<uint16_t>((arr[boff + 3] >> 6) & mask2);
const auto t7 = static_cast<uint16_t>((arr[boff + 4] >> 3) & mask5);
poly[poff + 0] = field::zq_t(t0);
poly[poff + 1] = field::zq_t(t1);
poly[poff + 2] = field::zq_t(t2);
poly[poff + 3] = field::zq_t(t3);
poly[poff + 4] = field::zq_t(t4);
poly[poff + 5] = field::zq_t(t5);
poly[poff + 6] = field::zq_t(t6);
poly[poff + 7] = field::zq_t(t7);
poly[poff + 0] = ml_kem_field::zq_t(t0);
poly[poff + 1] = ml_kem_field::zq_t(t1);
poly[poff + 2] = ml_kem_field::zq_t(t2);
poly[poff + 3] = ml_kem_field::zq_t(t3);
poly[poff + 4] = ml_kem_field::zq_t(t4);
poly[poff + 5] = ml_kem_field::zq_t(t5);
poly[poff + 6] = ml_kem_field::zq_t(t6);
poly[poff + 7] = ml_kem_field::zq_t(t7);
}
} else if constexpr (l == 10) {
constexpr size_t itr_cnt = ntt::N >> 2;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 2;
constexpr uint8_t mask6 = 0b111111;
constexpr uint8_t mask4 = 0b1111;
constexpr uint8_t mask2 = 0b11;
@@ -227,13 +221,13 @@ decode(std::span<const uint8_t, 32 * l> arr, std::span<field::zq_t, ntt::N> poly
const auto t2 = (static_cast<uint16_t>(arr[boff + 3] & mask6) << 4) | static_cast<uint16_t>(arr[boff + 2] >> 4);
const auto t3 = (static_cast<uint16_t>(arr[boff + 4]) << 2) | static_cast<uint16_t>(arr[boff + 3] >> 6);
poly[poff + 0] = field::zq_t(t0);
poly[poff + 1] = field::zq_t(t1);
poly[poff + 2] = field::zq_t(t2);
poly[poff + 3] = field::zq_t(t3);
poly[poff + 0] = ml_kem_field::zq_t(t0);
poly[poff + 1] = ml_kem_field::zq_t(t1);
poly[poff + 2] = ml_kem_field::zq_t(t2);
poly[poff + 3] = ml_kem_field::zq_t(t3);
}
} else if constexpr (l == 11) {
constexpr size_t itr_cnt = ntt::N >> 3;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 3;
constexpr uint8_t mask7 = 0b1111111;
constexpr uint8_t mask6 = 0b111111;
constexpr uint8_t mask5 = 0b11111;
@@ -255,19 +249,19 @@ decode(std::span<const uint8_t, 32 * l> arr, std::span<field::zq_t, ntt::N> poly
const auto t6 = (static_cast<uint16_t>(arr[boff + 9] & mask5) << 6) | static_cast<uint16_t>(arr[boff + 8] >> 2);
const auto t7 = (static_cast<uint16_t>(arr[boff + 10]) << 3) | static_cast<uint16_t>(arr[boff + 9] >> 5);
poly[poff + 0] = field::zq_t(t0);
poly[poff + 1] = field::zq_t(t1);
poly[poff + 2] = field::zq_t(t2);
poly[poff + 3] = field::zq_t(t3);
poly[poff + 4] = field::zq_t(t4);
poly[poff + 5] = field::zq_t(t5);
poly[poff + 6] = field::zq_t(t6);
poly[poff + 7] = field::zq_t(t7);
poly[poff + 0] = ml_kem_field::zq_t(t0);
poly[poff + 1] = ml_kem_field::zq_t(t1);
poly[poff + 2] = ml_kem_field::zq_t(t2);
poly[poff + 3] = ml_kem_field::zq_t(t3);
poly[poff + 4] = ml_kem_field::zq_t(t4);
poly[poff + 5] = ml_kem_field::zq_t(t5);
poly[poff + 6] = ml_kem_field::zq_t(t6);
poly[poff + 7] = ml_kem_field::zq_t(t7);
}
} else {
static_assert(l == 12, "l must be equal to 12 !");
constexpr size_t itr_cnt = ntt::N >> 1;
constexpr size_t itr_cnt = ml_kem_ntt::N >> 1;
constexpr uint8_t mask4 = 0b1111;
for (size_t i = 0; i < itr_cnt; i++) {
@@ -277,8 +271,9 @@ decode(std::span<const uint8_t, 32 * l> arr, std::span<field::zq_t, ntt::N> poly
const auto t0 = (static_cast<uint16_t>(arr[boff + 1] & mask4) << 8) | static_cast<uint16_t>(arr[boff + 0]);
const auto t1 = (static_cast<uint16_t>(arr[boff + 2]) << 4) | static_cast<uint16_t>(arr[boff + 1] >> 4);
poly[poff + 0] = field::zq_t(t0);
poly[poff + 1] = field::zq_t(t1);
// Read line (786-792) of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
poly[poff + 0] = ml_kem_field::zq_t::from_non_reduced(t0);
poly[poff + 1] = ml_kem_field::zq_t::from_non_reduced(t1);
}
}
}

View File

@@ -0,0 +1,58 @@
#pragma once
#include "shake256.hpp"
#include <limits>
#include <random>
namespace ml_kem_prng {
// Pseudo Random Number Generator s.t. N (>0) -many random bytes are read from SHAKE256 Xof state, arbitrary many times s.t. SHAKE256 state is obtained by
//
// - either hashing `bit_security_level / 8` -bytes sampled using std::random_device ( default )
// - or hashing `bit_security_level / 8` -bytes supplied as argument ( explicit )
//
// Note, std::random_device's behaviour is implementation defined feature, so this PRNG implementation doesn't guarantee that
// it'll generate cryptographic secure random bytes if you opt for using default constructor of this struct.
//
// I strongly suggest you read https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device before using default constructor.
// When using explicit constructor, it's your responsibility to supply `bit_security_level / 8` -many random seed bytes.
template<size_t bit_security_level>
requires((bit_security_level == 128) || (bit_security_level == 192) || (bit_security_level == 256))
struct prng_t
{
private:
shake256::shake256_t state{};
public:
// Default constructor which seeds PRNG with system randomness.
inline prng_t()
{
std::array<uint8_t, bit_security_level / std::numeric_limits<uint8_t>::digits> seed{};
auto _seed = std::span(seed);
// Read more @ https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device
std::random_device rd{};
size_t off = 0;
while (off < _seed.size()) {
const uint32_t v = rd();
std::memcpy(_seed.subspan(off, sizeof(v)).data(), &v, sizeof(v));
off += sizeof(v);
}
state.absorb(_seed);
state.finalize();
}
// Explicit constructor which can be used for seeding PRNG.
inline explicit constexpr prng_t(std::span<const uint8_t, bit_security_level / std::numeric_limits<uint8_t>::digits> seed)
{
state.absorb(seed);
state.finalize();
}
// Once PRNG is seeded i.e. PRNG object is constructed, you can request arbitrary many pseudo-random bytes from PRNG.
inline constexpr void read(std::span<uint8_t> bytes) { state.squeeze(bytes); }
};
}

View File

@@ -1,65 +1,56 @@
#pragma once
#include <cstddef>
#include <cstdint>
// Holds compile-time executable functions, ensuring that routines are invoked
// with proper arguments.
namespace kyber_params {
// Holds compile-time executable functions, ensuring that functions are invoked with proper arguments.
namespace ml_kem_params {
// Compile-time check to ensure that number of bits ( read `d` ) to consider
// during polynomial coefficient compression/ decompression is within tolerable
// bounds.
// Compile-time check to ensure that number of bits ( read `d` ) to consider during
// polynomial coefficient compression/ decompression is within tolerable bounds.
//
// See page 5 of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
// See "Compression and Decompression" section on page 18 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
consteval bool
check_d(const size_t d)
{
// $ python3
// >>> import math
// >>> a = math.log2(3329) # == 11.700873155140263
// >>> Q = 3329
// >>> a = math.log2(Q) # == 11.700873155140263
// >>> math.round(a) # == 12
constexpr size_t log2d = 12ul;
return d < log2d;
}
// Compile-time check to ensure that functions requiring η as parameter are
// invoked with proper argument.
// Compile-time check to ensure that functions requiring `η` as parameter are invoked with proper argument.
consteval bool
check_eta(const size_t eta)
{
return (eta == 2) || (eta == 3);
}
// Compile-time check to ensure that functions requiring k as parameter are
// invoked with proper argument.
// Compile-time check to ensure that functions requiring `k` as parameter are invoked with proper argument.
consteval bool
check_k(const size_t k)
{
return (k == 2) || (k == 3) || (k == 4);
}
// Compile-time check to ensure that polynomial to byte array encodoing ( and
// decoding ) routines are invoked with proper params.
// Compile-time check to ensure that polynomial to byte array encoding ( and decoding ) routines are invoked with proper params.
consteval bool
check_l(const size_t l)
{
return (l == 1) || (l == 4) || (l == 5) || (l == 10) || (l == 11) || (l == 12);
}
// Compile-time check to ensure that operand matrices are having compatible
// dimension for matrix multiplication
// Compile-time check to ensure that operand matrices are having compatible dimension for matrix multiplication.
consteval bool
check_matrix_dim(const size_t a_cols, const size_t b_rows)
{
return !static_cast<bool>(a_cols ^ b_rows);
}
// Compile-time check to ensure that Kyber PKE, KEM key generation routine is
// invoked with proper parameter set.
// Compile-time check to ensure that both K-PKE, ML-KEM key generation routine is invoked with proper parameter set.
//
// See algorithm 4, 7 and table 1 of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
// See table 2 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
consteval bool
check_keygen_params(const size_t k, const size_t eta1)
{
@@ -70,11 +61,9 @@ check_keygen_params(const size_t k, const size_t eta1)
return flg0 || flg1 || flg2;
}
// Compile-time check to ensure that Kyber PKE's encryption routine is
// invoked with proper parameter set.
// Compile-time check to ensure that K-PKE encryption routine is invoked with proper parameter set.
//
// See algorithm 5 and table 1 of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
// See table 2 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
consteval bool
check_encrypt_params(const size_t k, const size_t η1, const size_t η2, const size_t du, const size_t dv)
{
@@ -85,11 +74,9 @@ check_encrypt_params(const size_t k, const size_t η1, const size_t η2, const s
return flg0 || flg1 || flg2;
}
// Compile-time check to ensure that Kyber PKE's decryption routine is
// invoked with proper parameter set.
// Compile-time check to ensure that K-PKE decryption routine is invoked with proper parameter set.
//
// See algorithm 6 and table 1 of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
// See table 2 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
consteval bool
check_decrypt_params(const size_t k, const size_t du, const size_t dv)
{
@@ -100,22 +87,18 @@ check_decrypt_params(const size_t k, const size_t du, const size_t dv)
return flg0 || flg1 || flg2;
}
// Compile-time check to ensure that Kyber KEM's encapsulation routine is
// invoked with proper parameter set.
// Compile-time check to ensure that ML-KEM encapsulation routine is invoked with proper parameter set.
//
// See algorithm 8 and table 1 of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
// See table 2 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
consteval bool
check_encap_params(const size_t k, const size_t η1, const size_t η2, const size_t du, const size_t dv)
{
return check_encrypt_params(k, η1, η2, du, dv);
}
// Compile-time check to ensure that Kyber KEM's encapsulation routine is
// invoked with proper parameter set.
// Compile-time check to ensure that ML-KEM encapsulation routine is invoked with proper parameter set.
//
// See algorithm 9 and table 1 of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
// See table 2 of ML-KEM specification https://doi.org/10.6028/NIST.FIPS.203.ipd.
consteval bool
check_decap_params(const size_t k, const size_t η1, const size_t η2, const size_t du, const size_t dv)
{

View File

@@ -0,0 +1,59 @@
#pragma once
#include "subtle.hpp"
#include <span>
namespace ml_kem_utils {
// Given two byte arrays of equal length, this routine can be used for comparing them in constant-time,
// producing truth value (0xffffffff) in case of equality, otherwise it returns false value (0x00000000).
template<size_t n>
static inline constexpr uint32_t
ct_memcmp(std::span<const uint8_t, n> bytes0, std::span<const uint8_t, n> bytes1)
{
uint32_t flag = -1u;
for (size_t i = 0; i < n; i++) {
flag &= subtle::ct_eq<uint8_t, uint32_t>(bytes0[i], bytes1[i]);
}
return flag;
}
// Given a branch value, taking either 0x00000000 (false value) or 0xffffffff (truth value), this routine can be used for conditionally
// copying bytes from either `source0` byte array (in case branch holds truth value) or `source1` byte array (if branch holds false value)
// to `sink` byte array, all in constant-time.
//
// In simple words, `sink = cond ? source0 ? source1`
template<size_t n>
static inline constexpr void
ct_cond_memcpy(const uint32_t cond, std::span<uint8_t, n> sink, std::span<const uint8_t, n> source0, std::span<const uint8_t, n> source1)
{
for (size_t i = 0; i < n; i++) {
sink[i] = subtle::ct_select(cond, source0[i], source1[i]);
}
}
// Returns compile-time computable ML-KEM public key byte length.
static inline constexpr size_t
get_kem_public_key_len(const size_t k)
{
return k * 12 * 32 + 32;
}
// Returns compile-time computable ML-KEM secret key byte length.
static inline constexpr size_t
get_kem_secret_key_len(const size_t k)
{
const size_t t0 = k * 12 * 32;
const size_t t1 = get_kem_public_key_len(k);
return t0 + t1 + 32 + 32;
}
// Returns compile-time computable ML-KEM cipher text byte length.
static inline constexpr size_t
get_kem_cipher_text_len(size_t k, size_t du, size_t dv)
{
return k * du * 32 + dv * 32;
}
}

View File

@@ -0,0 +1,64 @@
#pragma once
#include "ml_kem/internals/ml_kem.hpp"
namespace ml_kem_1024 {
// ML-KEM Key Encapsulation Mechanism instantiated with ML-KEM-1024 parameters
// See row 3 of table 2 of ML-KEM specification @ https://doi.org/10.6028/NIST.FIPS.203.ipd
static constexpr size_t k = 4;
static constexpr size_t η1 = 2;
static constexpr size_t η2 = 2;
static constexpr size_t du = 11;
static constexpr size_t dv = 5;
// 32 -bytes seed `d`, used in underlying K-PKE key generation
static constexpr size_t SEED_D_BYTE_LEN = 32;
// 32 -bytes seed `z`, used in ML-KEM key generation
static constexpr size_t SEED_Z_BYTE_LEN = 32;
// 1568 -bytes ML-KEM-1024 public key
static constexpr size_t PKEY_BYTE_LEN = ml_kem_utils::get_kem_public_key_len(k);
// 3168 -bytes ML-KEM-1024 secret key
static constexpr size_t SKEY_BYTE_LEN = ml_kem_utils::get_kem_secret_key_len(k);
// 32 -bytes seed `m`, used in ML-KEM encapsulation
static constexpr size_t SEED_M_BYTE_LEN = 32;
// 1568 -bytes ML-KEM-1024 cipher text
static constexpr size_t CIPHER_TEXT_BYTE_LEN = ml_kem_utils::get_kem_cipher_text_len(k, du, dv);
// 32 -bytes ML-KEM-1024 shared secret
static constexpr size_t SHARED_SECRET_BYTE_LEN = 32;
// Computes a new ML-KEM-1024 keypair, given seed `d` and `z`.
inline constexpr void
keygen(std::span<const uint8_t, SEED_D_BYTE_LEN> d,
std::span<const uint8_t, SEED_Z_BYTE_LEN> z,
std::span<uint8_t, PKEY_BYTE_LEN> pubkey,
std::span<uint8_t, SKEY_BYTE_LEN> seckey)
{
ml_kem::keygen<k, η1>(d, z, pubkey, seckey);
}
// Given seed `m` and a ML-KEM-1024 public key, this routine computes a ML-KEM-1024 cipher text and a fixed size shared secret.
// If, input ML-KEM-1024 public key is malformed, encapsulation will fail, returning false.
[[nodiscard("If public key is malformed, encapsulation fails")]] inline constexpr bool
encapsulate(std::span<const uint8_t, SEED_M_BYTE_LEN> m,
std::span<const uint8_t, PKEY_BYTE_LEN> pubkey,
std::span<uint8_t, CIPHER_TEXT_BYTE_LEN> cipher,
std::span<uint8_t, SHARED_SECRET_BYTE_LEN> shared_secret)
{
return ml_kem::encapsulate<k, η1, η2, du, dv>(m, pubkey, cipher, shared_secret);
}
// Given a ML-KEM-1024 secret key and a cipher text, this routine computes a fixed size shared secret.
inline constexpr void
decapsulate(std::span<const uint8_t, SKEY_BYTE_LEN> seckey, std::span<const uint8_t, CIPHER_TEXT_BYTE_LEN> cipher, std::span<uint8_t, SHARED_SECRET_BYTE_LEN> shared_secret)
{
ml_kem::decapsulate<k, η1, η2, du, dv>(seckey, cipher, shared_secret);
}
}

View File

@@ -0,0 +1,64 @@
#pragma once
#include "ml_kem/internals/ml_kem.hpp"
namespace ml_kem_512 {
// ML-KEM Key Encapsulation Mechanism instantiated with ML-KEM-512 parameters
// See row 1 of table 2 of ML-KEM specification @ https://doi.org/10.6028/NIST.FIPS.203.ipd
static constexpr size_t k = 2;
static constexpr size_t η1 = 3;
static constexpr size_t η2 = 2;
static constexpr size_t du = 10;
static constexpr size_t dv = 4;
// 32 -bytes seed `d`, used in underlying K-PKE key generation
static constexpr size_t SEED_D_BYTE_LEN = 32;
// 32 -bytes seed `z`, used in ML-KEM key generation
static constexpr size_t SEED_Z_BYTE_LEN = 32;
// 800 -bytes ML-KEM-512 public key
static constexpr size_t PKEY_BYTE_LEN = ml_kem_utils::get_kem_public_key_len(k);
// 1632 -bytes ML-KEM-512 secret key
static constexpr size_t SKEY_BYTE_LEN = ml_kem_utils::get_kem_secret_key_len(k);
// 32 -bytes seed `m`, used in ML-KEM encapsulation
static constexpr size_t SEED_M_BYTE_LEN = 32;
// 768 -bytes ML-KEM-512 cipher text
static constexpr size_t CIPHER_TEXT_BYTE_LEN = ml_kem_utils::get_kem_cipher_text_len(k, du, dv);
// 32 -bytes ML-KEM-512 shared secret
static constexpr size_t SHARED_SECRET_BYTE_LEN = 32;
// Computes a new ML-KEM-512 keypair, given seed `d` and `z`.
inline constexpr void
keygen(std::span<const uint8_t, SEED_D_BYTE_LEN> d,
std::span<const uint8_t, SEED_Z_BYTE_LEN> z,
std::span<uint8_t, PKEY_BYTE_LEN> pubkey,
std::span<uint8_t, SKEY_BYTE_LEN> seckey)
{
ml_kem::keygen<k, η1>(d, z, pubkey, seckey);
}
// Given seed `m` and a ML-KEM-512 public key, this routine computes a ML-KEM-512 cipher text and a fixed size shared secret.
// If, input ML-KEM-512 public key is malformed, encapsulation will fail, returning false.
[[nodiscard("If public key is malformed, encapsulation fails")]] inline constexpr bool
encapsulate(std::span<const uint8_t, SEED_M_BYTE_LEN> m,
std::span<const uint8_t, PKEY_BYTE_LEN> pubkey,
std::span<uint8_t, CIPHER_TEXT_BYTE_LEN> cipher,
std::span<uint8_t, SHARED_SECRET_BYTE_LEN> shared_secret)
{
return ml_kem::encapsulate<k, η1, η2, du, dv>(m, pubkey, cipher, shared_secret);
}
// Given a ML-KEM-512 secret key and a cipher text, this routine computes a fixed size shared secret.
inline constexpr void
decapsulate(std::span<const uint8_t, SKEY_BYTE_LEN> seckey, std::span<const uint8_t, CIPHER_TEXT_BYTE_LEN> cipher, std::span<uint8_t, SHARED_SECRET_BYTE_LEN> shared_secret)
{
ml_kem::decapsulate<k, η1, η2, du, dv>(seckey, cipher, shared_secret);
}
}

View File

@@ -0,0 +1,64 @@
#pragma once
#include "ml_kem/internals/ml_kem.hpp"
namespace ml_kem_768 {
// ML-KEM Key Encapsulation Mechanism instantiated with ML-KEM-768 parameters
// See row 2 of table 2 of ML-KEM specification @ https://doi.org/10.6028/NIST.FIPS.203.ipd
static constexpr size_t k = 3;
static constexpr size_t η1 = 2;
static constexpr size_t η2 = 2;
static constexpr size_t du = 10;
static constexpr size_t dv = 4;
// 32 -bytes seed `d`, used in underlying K-PKE key generation
static constexpr size_t SEED_D_BYTE_LEN = 32;
// 32 -bytes seed `z`, used in ML-KEM key generation
static constexpr size_t SEED_Z_BYTE_LEN = 32;
// 1184 -bytes ML-KEM-768 public key
static constexpr size_t PKEY_BYTE_LEN = ml_kem_utils::get_kem_public_key_len(k);
// 2400 -bytes ML-KEM-768 secret key
static constexpr size_t SKEY_BYTE_LEN = ml_kem_utils::get_kem_secret_key_len(k);
// 32 -bytes seed `m`, used in ML-KEM encapsulation
static constexpr size_t SEED_M_BYTE_LEN = 32;
// 1088 -bytes ML-KEM-768 cipher text
static constexpr size_t CIPHER_TEXT_BYTE_LEN = ml_kem_utils::get_kem_cipher_text_len(k, du, dv);
// 32 -bytes ML-KEM-768 shared secret
static constexpr size_t SHARED_SECRET_BYTE_LEN = 32;
// Computes a new ML-KEM-768 keypair, given seed `d` and `z`.
inline constexpr void
keygen(std::span<const uint8_t, SEED_D_BYTE_LEN> d,
std::span<const uint8_t, SEED_Z_BYTE_LEN> z,
std::span<uint8_t, PKEY_BYTE_LEN> pubkey,
std::span<uint8_t, SKEY_BYTE_LEN> seckey)
{
ml_kem::keygen<k, η1>(d, z, pubkey, seckey);
}
// Given seed `m` and a ML-KEM-768 public key, this routine computes a ML-KEM-768 cipher text and a fixed size shared secret.
// If, input ML-KEM-768 public key is malformed, encapsulation will fail, returning false.
[[nodiscard("If public key is malformed, encapsulation fails")]] inline constexpr bool
encapsulate(std::span<const uint8_t, SEED_M_BYTE_LEN> m,
std::span<const uint8_t, PKEY_BYTE_LEN> pubkey,
std::span<uint8_t, CIPHER_TEXT_BYTE_LEN> cipher,
std::span<uint8_t, SHARED_SECRET_BYTE_LEN> shared_secret)
{
return ml_kem::encapsulate<k, η1, η2, du, dv>(m, pubkey, cipher, shared_secret);
}
// Given a ML-KEM-768 secret key and a cipher text, this routine computes a fixed size shared secret.
inline constexpr void
decapsulate(std::span<const uint8_t, SKEY_BYTE_LEN> seckey, std::span<const uint8_t, CIPHER_TEXT_BYTE_LEN> cipher, std::span<uint8_t, SHARED_SECRET_BYTE_LEN> shared_secret)
{
ml_kem::decapsulate<k, η1, η2, du, dv>(seckey, cipher, shared_secret);
}
}

View File

@@ -1,238 +0,0 @@
#pragma once
#include "field.hpp"
#include <array>
#include <cstring>
// (inverse) Number Theoretic Transform for degree-255 polynomial, over Kyber
// Prime Field Zq | q = 3329
namespace ntt {
constexpr size_t LOG2N = 8;
constexpr size_t N = 1 << LOG2N;
// First primitive 256 -th root of unity modulo q | q = 3329
//
// Meaning, 17 ** 256 == 1 mod q
constexpr auto ζ = field::zq_t(17);
// Multiplicative inverse of N/ 2 over Z_q | q = 3329 and N = 256
//
// Meaning (N/ 2) * 3303 = 1 mod q
constexpr auto INV_N = field::zq_t(N / 2).inv();
// Given a 64 -bit unsigned integer, this routine extracts specified many
// contiguous bits from ( least significant bits ) LSB side & reverses their bit
// order, returning bit reversed `mbw` -bit wide number
//
// See
// https://github.com/itzmeanjan/falcon/blob/45b0593/include/ntt.hpp#L30-L38
// for source of inspiration
template<size_t mbw>
static inline constexpr size_t
bit_rev(const size_t v)
{
size_t v_rev = 0ul;
for (size_t i = 0; i < mbw; i++) {
const size_t bit = (v >> i) & 0b1;
v_rev ^= bit << (mbw - 1ul - i);
}
return v_rev;
}
// Compile-time compute powers of ζ, used for polynomial evaluation ( NTT )
consteval std::array<field::zq_t, N / 2>
compute_ntt_ζ()
{
std::array<field::zq_t, N / 2> res;
for (size_t i = 0; i < N / 2; i++) {
res[i] = ζ ^ bit_rev<LOG2N - 1>(i);
}
return res;
}
// Precomputed constants ( powers of ζ ), used for computing NTT form of
// degree-255 polynomial
constexpr std::array<field::zq_t, N / 2> NTT_ζ_EXP = compute_ntt_ζ();
// Compile-time compute negated powers of ζ, used for polynomial interpolation (
// iNTT )
consteval std::array<field::zq_t, N / 2>
compute_intt_ζ()
{
std::array<field::zq_t, N / 2> res;
for (size_t i = 0; i < N / 2; i++) {
res[i] = -NTT_ζ_EXP[i];
}
return res;
}
// Precomputed constants ( negated powers of ζ ), used for computing coefficient
// form of degree-255 polynomial using inverse NTT
constexpr std::array<field::zq_t, N / 2> INTT_ζ_EXP = compute_intt_ζ();
// Compile-time compute powers of ζ, used for multiplication of two degree-255
// polynomials in NTT representation.
consteval std::array<field::zq_t, N / 2>
compute_mul_ζ()
{
std::array<field::zq_t, N / 2> res;
for (size_t i = 0; i < N / 2; i++) {
res[i] = ζ ^ ((bit_rev<LOG2N - 1>(i) << 1) ^ 1);
}
return res;
}
// Precomputed constants ( powers of ζ ), used when multiplying two degree-255
// polynomials in NTT domain.
constexpr std::array<field::zq_t, N / 2> POLY_MUL_ζ_EXP = compute_mul_ζ();
// Given a polynomial f with 256 coefficients over F_q | q = 3329, this routine
// computes number theoretic transform using cooley-tukey algorithm, producing
// polynomial f' s.t. its coefficients are placed in bit-reversed order
//
// Note, this routine mutates input i.e. it's an in-place NTT implementation.
//
// Implementation inspired from
// https://github.com/itzmeanjan/falcon/blob/45b0593/include/ntt.hpp#L69-L144
static inline constexpr void
ntt(std::span<field::zq_t, N> poly)
{
for (size_t l = LOG2N - 1; l >= 1; l--) {
const size_t len = 1ul << l;
const size_t lenx2 = len << 1;
const size_t k_beg = N >> (l + 1);
for (size_t start = 0; start < poly.size(); start += lenx2) {
const size_t k_now = k_beg + (start >> (l + 1));
// Looking up precomputed constant, though it can be computed using
//
// ζ ^ bit_rev<LOG2N - 1>(k_now)
//
// This is how these constants are generated !
const field::zq_t ζ_exp = NTT_ζ_EXP[k_now];
for (size_t i = start; i < start + len; i++) {
auto tmp = ζ_exp;
tmp *= poly[i + len];
poly[i + len] = poly[i] - tmp;
poly[i] += tmp;
}
}
}
}
// Given a polynomial f with 256 coefficients over F_q | q = 3329, s.t. its
// coefficients are placed in bit-reversed order, this routine computes inverse
// number theoretic transform using cooley-tukey algorithm, producing polynomial
// f' s.t. its coefficients are placed in standard order
//
// Note, this routine mutates input i.e. it's an in-place iNTT implementation.
//
// Implementation inspired from
// https://github.com/itzmeanjan/falcon/blob/45b0593/include/ntt.hpp#L146-L224
static inline constexpr void
intt(std::span<field::zq_t, N> poly)
{
for (size_t l = 1; l < LOG2N; l++) {
const size_t len = 1ul << l;
const size_t lenx2 = len << 1;
const size_t k_beg = (N >> l) - 1;
for (size_t start = 0; start < poly.size(); start += lenx2) {
const size_t k_now = k_beg - (start >> (l + 1));
// Looking up precomputed constant, though it can be computed using
//
// -(ζ ^ bit_rev<LOG2N - 1>(k_now))
//
// Or simpler
//
// -NTT_ζ_EXP[k_now]
const field::zq_t neg_ζ_exp = INTT_ζ_EXP[k_now];
for (size_t i = start; i < start + len; i++) {
const auto tmp = poly[i];
poly[i] += poly[i + len];
poly[i + len] = tmp - poly[i + len];
poly[i + len] *= neg_ζ_exp;
}
}
}
for (size_t i = 0; i < poly.size(); i++) {
poly[i] *= INV_N;
}
}
// Given two degree-1 polynomials s.t.
//
// f = f_2i + f_(2i + 1) * X
// g = g_2i + g_(2i + 1) * X
//
// this routine computes resulting degree-1 polynomial h s.t.
//
// h = f * g mod X ^ 2 ζ ^ (2 * br<7>(i) + 1) | i ∈ [0, 128)
//
// See page 6 of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
static inline constexpr void
basemul(std::span<const field::zq_t, 2> f, // degree-1 polynomial
std::span<const field::zq_t, 2> g, // degree-1 polynomial
std::span<field::zq_t, 2> h, // degree-1 polynomial
const field::zq_t ζ // zeta
)
{
field::zq_t f0 = f[0];
field::zq_t f1 = f[1];
f0 *= g[0];
f1 *= g[1];
f1 *= ζ;
f1 += f0;
h[0] = f1;
field::zq_t g0 = g[0];
field::zq_t g1 = g[1];
g1 *= f[0];
g0 *= f[1];
g1 += g0;
h[1] = g1;
}
// Given two degree-255 polynomials in NTT form, this routine performs 128
// basecase multiplications for 128 pairs of degree-1 polynomials s.t.
//
// f = (f0ˆ + f1ˆX, f2ˆ + f3ˆX, ..., f254ˆ + f255ˆX)
// g = (g0ˆ + g1ˆX, g2ˆ + g3ˆX, ..., g254ˆ + g255ˆX)
//
// h = f ◦ g
static inline constexpr void
polymul(std::span<const field::zq_t, N> f, // degree-255 polynomial
std::span<const field::zq_t, N> g, // degree-255 polynomial
std::span<field::zq_t, N> h // degree-255 polynomial
)
{
constexpr size_t cnt = f.size() >> 1;
using poly_t = std::span<const field::zq_t, 2>;
using mut_poly_t = std::span<field::zq_t, 2>;
for (size_t i = 0; i < cnt; i++) {
const size_t off = i << 1;
basemul(poly_t(f.subspan(off, 2)), poly_t(g.subspan(off, 2)), mut_poly_t(h.subspan(off, 2)), POLY_MUL_ζ_EXP[i]);
}
}
}

View File

@@ -1,208 +0,0 @@
#pragma once
#include "field.hpp"
#include "params.hpp"
#include "poly_vec.hpp"
#include "sampling.hpp"
#include "sha3_512.hpp"
#include "utils.hpp"
#include <array>
#include <span>
// IND-CPA-secure Public Key Encryption Scheme
namespace pke {
// Kyber CPAPKE key generation algorithm, which takes two parameters `k` & `η1`
// ( read eta1 ) and generates byte serialized public key and secret key of
// following length
//
// public key: (k * 12 * 32 + 32) -bytes wide
// secret key: (k * 12 * 32) -bytes wide
//
// See algorithm 4 defined in Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
//
// Note, this routine allows you to pass 32 -bytes seed ( see first parameter ),
// which is designed this way for ease of writing test cases against known
// answer tests, obtained from Kyber reference implementation
// https://github.com/pq-crystals/kyber.git. It also helps in properly
// benchmarking underlying PKE's key generation implementation.
template<size_t k, size_t eta1>
static inline void
keygen(std::span<const uint8_t, 32> d, std::span<uint8_t, k * 12 * 32 + 32> pubkey, std::span<uint8_t, k * 12 * 32> seckey)
requires(kyber_params::check_keygen_params(k, eta1))
{
// step 2
std::array<uint8_t, 64> g_out{};
auto _g_out = std::span(g_out);
sha3_512::sha3_512_t h512;
h512.absorb(d);
h512.finalize();
h512.digest(_g_out);
const auto rho = _g_out.template subspan<0, 32>();
const auto sigma = _g_out.template subspan<rho.size(), 32>();
// step 4, 5, 6, 7, 8
std::array<field::zq_t, k * k * ntt::N> A_prime{};
kyber_utils::generate_matrix<k, false>(A_prime, rho);
// step 3
uint8_t N = 0;
// step 9, 10, 11, 12
std::array<field::zq_t, k * ntt::N> s{};
kyber_utils::generate_vector<k, eta1>(s, sigma, N);
N += k;
// step 13, 14, 15, 16
std::array<field::zq_t, k * ntt::N> e{};
kyber_utils::generate_vector<k, eta1>(e, sigma, N);
N += k;
// step 17, 18
kyber_utils::poly_vec_ntt<k>(s);
kyber_utils::poly_vec_ntt<k>(e);
// step 19
std::array<field::zq_t, k * ntt::N> t_prime{};
kyber_utils::matrix_multiply<k, k, k, 1>(A_prime, s, t_prime);
kyber_utils::poly_vec_add_to<k>(e, t_prime);
// step 20, 21, 22
constexpr size_t pkoff = k * 12 * 32;
auto _pubkey0 = pubkey.template subspan<0, pkoff>();
auto _pubkey1 = pubkey.template subspan<pkoff, 32>();
kyber_utils::poly_vec_encode<k, 12>(t_prime, _pubkey0);
std::copy(rho.begin(), rho.end(), _pubkey1.begin());
kyber_utils::poly_vec_encode<k, 12>(s, seckey);
}
// Given (k * 12 * 32 + 32) -bytes public key, 32 -bytes message ( to be
// encrypted ) and 32 -bytes random coin ( from where all randomness is
// deterministically sampled ), this routine encrypts message using
// INDCPA-secure Kyber encryption algorithm, computing compressed cipher text of
// (k * du * 32 + dv * 32) -bytes.
//
// See algorithm 5 defined in Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
template<size_t k, size_t eta1, size_t eta2, size_t du, size_t dv>
static inline void
encrypt(std::span<const uint8_t, k * 12 * 32 + 32> pubkey,
std::span<const uint8_t, 32> msg,
std::span<const uint8_t, 32> rcoin,
std::span<uint8_t, k * du * 32 + dv * 32> enc)
requires(kyber_params::check_encrypt_params(k, eta1, eta2, du, dv))
{
// step 2, 3
constexpr size_t pkoff = k * 12 * 32;
auto _pubkey0 = pubkey.template subspan<0, pkoff>();
auto rho = pubkey.template subspan<pkoff, 32>();
std::array<field::zq_t, k * ntt::N> t_prime{};
kyber_utils::poly_vec_decode<k, 12>(_pubkey0, t_prime);
// step 4, 5, 6, 7, 8
std::array<field::zq_t, k * k * ntt::N> A_prime{};
kyber_utils::generate_matrix<k, true>(A_prime, rho);
// step 1
uint8_t N = 0;
// step 9, 10, 11, 12
std::array<field::zq_t, k * ntt::N> r{};
kyber_utils::generate_vector<k, eta1>(r, rcoin, N);
N += k;
// step 13, 14, 15, 16
std::array<field::zq_t, k * ntt::N> e1{};
kyber_utils::generate_vector<k, eta2>(e1, rcoin, N);
N += k;
// step 17
std::array<field::zq_t, ntt::N> e2{};
kyber_utils::generate_vector<1, eta2>(e2, rcoin, N);
// step 18
kyber_utils::poly_vec_ntt<k>(r);
// step 19
std::array<field::zq_t, k * ntt::N> u{};
kyber_utils::matrix_multiply<k, k, k, 1>(A_prime, r, u);
kyber_utils::poly_vec_intt<k>(u);
kyber_utils::poly_vec_add_to<k>(e1, u);
// step 20
std::array<field::zq_t, ntt::N> v{};
kyber_utils::matrix_multiply<1, k, k, 1>(t_prime, r, v);
kyber_utils::poly_vec_intt<1>(v);
kyber_utils::poly_vec_add_to<1>(e2, v);
std::array<field::zq_t, ntt::N> m{};
kyber_utils::decode<1>(msg, m);
kyber_utils::poly_decompress<1>(m);
kyber_utils::poly_vec_add_to<1>(m, v);
constexpr size_t encoff = k * du * 32;
auto _enc0 = enc.template subspan<0, encoff>();
auto _enc1 = enc.template subspan<encoff, dv * 32>();
// step 21
kyber_utils::poly_vec_compress<k, du>(u);
kyber_utils::poly_vec_encode<k, du>(u, _enc0);
// step 22
kyber_utils::poly_compress<dv>(v);
kyber_utils::encode<dv>(v, _enc1);
}
// Given (k * 12 * 32) -bytes secret key and (k * du * 32 + dv * 32) -bytes
// encrypted ( cipher ) text, this routine recovers 32 -bytes plain text which
// was encrypted using respective public key, which is associated with this
// secret key.
//
// See algorithm 6 defined in Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
template<size_t k, size_t du, size_t dv>
static inline void
decrypt(std::span<const uint8_t, k * 12 * 32> seckey, std::span<const uint8_t, k * du * 32 + dv * 32> enc, std::span<uint8_t, 32> dec)
requires(kyber_params::check_decrypt_params(k, du, dv))
{
constexpr size_t encoff = k * du * 32;
auto _enc0 = enc.template subspan<0, encoff>();
auto _enc1 = enc.template subspan<encoff, dv * 32>();
// step 1
std::array<field::zq_t, k * ntt::N> u{};
kyber_utils::poly_vec_decode<k, du>(_enc0, u);
kyber_utils::poly_vec_decompress<k, du>(u);
// step 2
std::array<field::zq_t, ntt::N> v{};
kyber_utils::decode<dv>(_enc1, v);
kyber_utils::poly_decompress<dv>(v);
// step 3
std::array<field::zq_t, k * ntt::N> s_prime{};
kyber_utils::poly_vec_decode<k, 12>(seckey, s_prime);
// step 4
kyber_utils::poly_vec_ntt<k>(u);
std::array<field::zq_t, ntt::N> t{};
kyber_utils::matrix_multiply<1, k, k, 1>(s_prime, u, t);
kyber_utils::poly_vec_intt<1>(t);
kyber_utils::poly_vec_sub_from<1>(t, v);
kyber_utils::poly_compress<1>(v);
kyber_utils::encode<1>(v, dec);
}
}

View File

@@ -1,175 +0,0 @@
#pragma once
#include "compression.hpp"
#include "field.hpp"
#include "ntt.hpp"
#include "params.hpp"
#include "serialize.hpp"
#include <array>
#include <cstdint>
// IND-CPA-secure Public Key Encryption Scheme Utilities
namespace kyber_utils {
// Given two matrices ( in NTT domain ) of compatible dimension, where each
// matrix element is a degree-255 polynomial over Z_q | q = 3329, this routine
// attempts to multiply and compute resulting matrix
template<size_t a_rows, size_t a_cols, size_t b_rows, size_t b_cols>
static inline constexpr void
matrix_multiply(std::span<const field::zq_t, a_rows * a_cols * ntt::N> a,
std::span<const field::zq_t, b_rows * b_cols * ntt::N> b,
std::span<field::zq_t, a_rows * b_cols * ntt::N> c)
requires(kyber_params::check_matrix_dim(a_cols, b_rows))
{
using poly_t = std::span<const field::zq_t, ntt::N>;
std::array<field::zq_t, ntt::N> tmp{};
auto _tmp = std::span(tmp);
for (size_t i = 0; i < a_rows; i++) {
for (size_t j = 0; j < b_cols; j++) {
const size_t coff = (i * b_cols + j) * ntt::N;
for (size_t k = 0; k < a_cols; k++) {
const size_t aoff = (i * a_cols + k) * ntt::N;
const size_t boff = (k * b_cols + j) * ntt::N;
ntt::polymul(poly_t(a.subspan(aoff, ntt::N)), poly_t(b.subspan(boff, ntt::N)), _tmp);
for (size_t l = 0; l < ntt::N; l++) {
c[coff + l] += tmp[l];
}
}
}
}
}
// Given a vector ( of dimension k x 1 ) of degree-255 polynomials ( where
// polynomial coefficients are in non-NTT form ), this routine applies in-place
// polynomial NTT over k polynomials
template<size_t k>
static inline constexpr void
poly_vec_ntt(std::span<field::zq_t, k * ntt::N> vec)
requires((k == 1) || kyber_params::check_k(k))
{
using poly_t = std::span<field::zq_t, ntt::N>;
for (size_t i = 0; i < k; i++) {
const size_t off = i * ntt::N;
ntt::ntt(poly_t(vec.subspan(off, ntt::N)));
}
}
// Given a vector ( of dimension k x 1 ) of degree-255 polynomials ( where
// polynomial coefficients are in NTT form i.e. they are placed in bit-reversed
// order ), this routine applies in-place polynomial iNTT over those k
// polynomials
template<size_t k>
static inline constexpr void
poly_vec_intt(std::span<field::zq_t, k * ntt::N> vec)
requires((k == 1) || kyber_params::check_k(k))
{
using poly_t = std::span<field::zq_t, ntt::N>;
for (size_t i = 0; i < k; i++) {
const size_t off = i * ntt::N;
ntt::intt(poly_t(vec.subspan(off, ntt::N)));
}
}
// Given a vector ( of dimension k x 1 ) of degree-255 polynomials, this
// routine adds it to another polynomial vector of same dimension
template<size_t k>
static inline constexpr void
poly_vec_add_to(std::span<const field::zq_t, k * ntt::N> src, std::span<field::zq_t, k * ntt::N> dst)
requires((k == 1) || kyber_params::check_k(k))
{
constexpr size_t cnt = k * ntt::N;
for (size_t i = 0; i < cnt; i++) {
dst[i] += src[i];
}
}
// Given a vector ( of dimension k x 1 ) of degree-255 polynomials, this
// routine subtracts it to another polynomial vector of same dimension
template<size_t k>
static inline constexpr void
poly_vec_sub_from(std::span<const field::zq_t, k * ntt::N> src, std::span<field::zq_t, k * ntt::N> dst)
requires((k == 1) || kyber_params::check_k(k))
{
constexpr size_t cnt = k * ntt::N;
for (size_t i = 0; i < cnt; i++) {
dst[i] -= src[i];
}
}
// Given a vector ( of dimension k x 1 ) of degree-255 polynomials, this routine
// encodes each of those polynomials into 32 x l -bytes, writing to a
// (k x 32 x l) -bytes destination array
template<size_t k, size_t l>
static inline void
poly_vec_encode(std::span<const field::zq_t, k * ntt::N> src, std::span<uint8_t, k * 32 * l> dst)
requires(kyber_params::check_k(k))
{
using poly_t = std::span<const field::zq_t, src.size() / k>;
using serialized_t = std::span<uint8_t, dst.size() / k>;
for (size_t i = 0; i < k; i++) {
const size_t off0 = i * ntt::N;
const size_t off1 = i * l * 32;
kyber_utils::encode<l>(poly_t(src.subspan(off0, ntt::N)), serialized_t(dst.subspan(off1, 32 * l)));
}
}
// Given a byte array of length (k x 32 x l) -bytes, this routine decodes them
// into k degree-255 polynomials, writing them to a column vector of dimension
// k x 1
template<size_t k, size_t l>
static inline void
poly_vec_decode(std::span<const uint8_t, k * 32 * l> src, std::span<field::zq_t, k * ntt::N> dst)
requires(kyber_params::check_k(k))
{
using serialized_t = std::span<const uint8_t, src.size() / k>;
using poly_t = std::span<field::zq_t, dst.size() / k>;
for (size_t i = 0; i < k; i++) {
const size_t off0 = i * l * 32;
const size_t off1 = i * ntt::N;
kyber_utils::decode<l>(serialized_t(src.subspan(off0, 32 * l)), poly_t(dst.subspan(off1, ntt::N)));
}
}
// Given a vector ( of dimension k x 1 ) of degree-255 polynomials, each of
// k * 256 coefficients are compressed, while mutating input.
template<size_t k, size_t d>
static inline constexpr void
poly_vec_compress(std::span<field::zq_t, k * ntt::N> vec)
requires(kyber_params::check_k(k))
{
using poly_t = std::span<field::zq_t, vec.size() / k>;
for (size_t i = 0; i < k; i++) {
const size_t off = i * ntt::N;
kyber_utils::poly_compress<d>(poly_t(vec.subspan(off, ntt::N)));
}
}
// Given a vector ( of dimension k x 1 ) of degree-255 polynomials, each of
// k * 256 coefficients are decompressed, while mutating input.
template<size_t k, size_t d>
static inline constexpr void
poly_vec_decompress(std::span<field::zq_t, k * ntt::N> vec)
requires(kyber_params::check_k(k))
{
using poly_t = std::span<field::zq_t, vec.size() / k>;
for (size_t i = 0; i < k; i++) {
const size_t off = i * ntt::N;
kyber_utils::poly_decompress<d>(poly_t(vec.subspan(off, ntt::N)));
}
}
}

View File

@@ -1,64 +0,0 @@
#pragma once
#include "shake256.hpp"
#include <array>
#include <random>
#include <span>
// Pseudo Random Number Generator
namespace prng {
// Pseudo Random Number Generator s.t. N (>0) -many random bytes are read from
// SHAKE256 XOF state, arbitrary many times s.t. SHAKE256 state is obtained by
//
// - either hashing 32 -bytes sampled using std::random_device ( default )
// - or hashing M(>0) -bytes supplied as argument ( explicit )
//
// Note, std::random_device's behaviour is implementation defined feature, so
// this PRNG implementation doesn't guarantee that it'll generate cryptographic
// secure random bytes if you opt for using default constructor of this struct.
//
// I suggest you read
// https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device
// before using default constructor. When using explicit constructor, it's
// your responsibility to supply M -many random seed bytes.
struct prng_t
{
private:
shake256::shake256_t state;
public:
// Default constructor which seeds PRNG with system randomness.
inline prng_t()
{
std::array<uint8_t, 32> seed{};
auto _seed = std::span(seed);
// Read more @
// https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device
std::random_device rd{};
size_t off = 0;
while (off < sizeof(seed)) {
const uint32_t v = rd();
std::memcpy(_seed.subspan(off, sizeof(v)).data(), &v, sizeof(v));
off += sizeof(v);
}
state.absorb(_seed);
state.finalize();
}
// Explicit constructor which can be used for seeding PRNG.
inline explicit prng_t(std::span<const uint8_t> seed)
{
state.absorb(seed);
state.finalize();
}
// Once PRNG is seeded i.e. PRNG object is constructed, you can request
// arbitrary many pseudo-random bytes from PRNG.
inline void read(std::span<uint8_t> bytes) { state.squeeze(bytes); }
};
}

View File

@@ -1,169 +0,0 @@
#pragma once
#include "field.hpp"
#include "ntt.hpp"
#include "params.hpp"
#include "shake128.hpp"
#include "shake256.hpp"
#include <array>
#include <cstdint>
// IND-CPA-secure Public Key Encryption Scheme Utilities
namespace kyber_utils {
// Uniform sampling in R_q | q = 3329
//
// Given a byte stream, this routine *deterministically* samples a degree 255
// polynomial in NTT representation. If the byte stream is statistically close
// to uniform random byte stream, produced polynomial coefficients are also
// statiscally close to randomly sampled elements of R_q.
//
// See algorithm 1, defined in Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
inline void
parse(shake128::shake128_t& hasher, std::span<field::zq_t, ntt::N> poly)
{
constexpr size_t n = poly.size();
size_t coeff_idx = 0;
std::array<uint8_t, shake128::RATE / 8> buf{};
while (coeff_idx < n) {
hasher.squeeze(buf);
for (size_t off = 0; (off < buf.size()) && (coeff_idx < n); off += 3) {
const uint16_t d1 = (static_cast<uint16_t>(buf[off + 1] & 0x0f) << 8) | static_cast<uint16_t>(buf[off + 0]);
const uint16_t d2 = (static_cast<uint16_t>(buf[off + 2]) << 4) | (static_cast<uint16_t>(buf[off + 1] >> 4));
if (d1 < field::Q) {
poly[coeff_idx] = field::zq_t(d1);
coeff_idx++;
}
if ((d2 < field::Q) && (coeff_idx < n)) {
poly[coeff_idx] = field::zq_t(d2);
coeff_idx++;
}
}
}
}
// Generate public matrix A ( consists of degree-255 polynomials ) in NTT
// domain, by sampling from a XOF ( read SHAKE128 ), which is seeded with 32
// -bytes key and two nonces ( each of 1 -byte )
//
// See step (4-8) of algorithm 4/ 5, defined in Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
template<size_t k, bool transpose>
static inline void
generate_matrix(std::span<field::zq_t, k * k * ntt::N> mat, std::span<const uint8_t, 32> rho)
requires(kyber_params::check_k(k))
{
std::array<uint8_t, rho.size() + 2> xof_in{};
std::copy(rho.begin(), rho.end(), xof_in.begin());
for (size_t i = 0; i < k; i++) {
for (size_t j = 0; j < k; j++) {
const size_t off = (i * k + j) * ntt::N;
if constexpr (transpose) {
xof_in[32] = static_cast<uint8_t>(i);
xof_in[33] = static_cast<uint8_t>(j);
} else {
xof_in[32] = static_cast<uint8_t>(j);
xof_in[33] = static_cast<uint8_t>(i);
}
shake128::shake128_t hasher;
hasher.absorb(xof_in);
hasher.finalize();
using poly_t = std::span<field::zq_t, mat.size() / (k * k)>;
parse(hasher, poly_t(mat.subspan(off, ntt::N)));
}
}
}
// Centered Binomial Distribution
//
// A degree 255 polynomial deterministically sampled from 64 * eta -bytes output
// of a pseudorandom function ( PRF )
//
// See algorithm 2, defined in Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
template<size_t eta>
static inline void
cbd(std::span<const uint8_t, 64 * eta> prf, std::span<field::zq_t, ntt::N> poly)
requires(kyber_params::check_eta(eta))
{
if constexpr (eta == 2) {
static_assert(eta == 2, "η must be 2 !");
constexpr size_t till = 64 * eta;
constexpr uint8_t mask8 = 0b01010101;
constexpr uint8_t mask2 = 0b11;
for (size_t i = 0; i < till; i++) {
const size_t poff = i << 1;
const uint8_t word = prf[i];
const uint8_t t0 = (word >> 0) & mask8;
const uint8_t t1 = (word >> 1) & mask8;
const uint8_t t2 = t0 + t1;
poly[poff + 0] = field::zq_t((t2 >> 0) & mask2) - field::zq_t((t2 >> 2) & mask2);
poly[poff + 1] = field::zq_t((t2 >> 4) & mask2) - field::zq_t((t2 >> 6) & mask2);
}
} else {
static_assert(eta == 3, "η must be 3 !");
constexpr size_t till = 64;
constexpr uint32_t mask24 = 0b001001001001001001001001u;
constexpr uint32_t mask3 = 0b111u;
for (size_t i = 0; i < till; i++) {
const size_t boff = i * 3;
const size_t poff = i << 2;
const uint32_t word = (static_cast<uint32_t>(prf[boff + 2]) << 16) | (static_cast<uint32_t>(prf[boff + 1]) << 8) | static_cast<uint32_t>(prf[boff + 0]);
const uint32_t t0 = (word >> 0) & mask24;
const uint32_t t1 = (word >> 1) & mask24;
const uint32_t t2 = (word >> 2) & mask24;
const uint32_t t3 = t0 + t1 + t2;
poly[poff + 0] = field::zq_t((t3 >> 0) & mask3) - field::zq_t((t3 >> 3) & mask3);
poly[poff + 1] = field::zq_t((t3 >> 6) & mask3) - field::zq_t((t3 >> 9) & mask3);
poly[poff + 2] = field::zq_t((t3 >> 12) & mask3) - field::zq_t((t3 >> 15) & mask3);
poly[poff + 3] = field::zq_t((t3 >> 18) & mask3) - field::zq_t((t3 >> 21) & mask3);
}
}
}
// Sample a polynomial vector from Bη, following step (9-12) of algorithm 4,
// defined in Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
template<size_t k, size_t eta>
static inline void
generate_vector(std::span<field::zq_t, k * ntt::N> vec, std::span<const uint8_t, 32> sigma, const uint8_t nonce)
requires((k == 1) || kyber_params::check_k(k))
{
std::array<uint8_t, 64 * eta> prf_out{};
std::array<uint8_t, sigma.size() + 1> prf_in{};
std::copy(sigma.begin(), sigma.end(), prf_in.begin());
for (size_t i = 0; i < k; i++) {
const size_t off = i * ntt::N;
prf_in[32] = nonce + static_cast<uint8_t>(i);
shake256::shake256_t hasher;
hasher.absorb(prf_in);
hasher.finalize();
hasher.squeeze(prf_out);
using poly_t = std::span<field::zq_t, vec.size() / k>;
kyber_utils::cbd<eta>(prf_out, poly_t(vec.subspan(off, ntt::N)));
}
}
}

View File

@@ -1,107 +0,0 @@
#pragma once
#include "params.hpp"
#include "subtle.hpp"
#include <array>
#include <cassert>
#include <charconv>
#include <cstddef>
#include <cstdint>
#include <iomanip>
#include <span>
#include <sstream>
// IND-CPA-secure Public Key Encryption Scheme Utilities
namespace kyber_utils {
// Given two byte arrays of equal length, this routine can be used for comparing them in constant-time,
// producing truth value (0xffffffff) in case of equality, otherwise it returns false value (0x00000000).
template<size_t n>
static inline uint32_t
ct_memcmp(std::span<const uint8_t, n> bytes0, std::span<const uint8_t, n> bytes1)
{
uint32_t flag = -1u;
for (size_t i = 0; i < n; i++) {
flag &= subtle::ct_eq<uint8_t, uint32_t>(bytes0[i], bytes1[i]);
}
return flag;
}
// Given a branch value, taking either 0x00000000 (false value) or 0xffffffff (truth value), this routine can be used for conditionally
// copying bytes from either `source0` byte array (in case branch holds truth value) or `source1` byte array (if branch holds false value)
// to `sink` byte array, all in constant-time. Note, all these byte arrays are of equal length.
template<size_t n>
static inline void
ct_cond_memcpy(const uint32_t cond, std::span<uint8_t, n> sink, std::span<const uint8_t, n> source0, std::span<const uint8_t, n> source1)
{
for (size_t i = 0; i < n; i++) {
sink[i] = subtle::ct_select(cond, source0[i], source1[i]);
}
}
// Given a bytearray of length N, this function converts it to human readable
// hex string of length N << 1 | N >= 0
inline const std::string
to_hex(std::span<const uint8_t> bytes)
{
std::stringstream ss;
ss << std::hex;
for (size_t i = 0; i < bytes.size(); i++) {
ss << std::setw(2) << std::setfill('0') << static_cast<uint32_t>(bytes[i]);
}
return ss.str();
}
// Given a hex encoded string of length 2*L, this routine can be used for
// parsing it as a byte array of length L.
template<size_t L>
inline std::array<uint8_t, L>
from_hex(std::string_view bytes)
{
const size_t blen = bytes.length();
assert(blen % 2 == 0);
assert(blen / 2 == L);
std::array<uint8_t, L> res{};
for (size_t i = 0; i < L; i++) {
const size_t off = i * 2;
uint8_t byte = 0;
auto sstr = bytes.substr(off, 2);
std::from_chars(sstr.data(), sstr.data() + 2, byte, 16);
res[i] = byte;
}
return res;
}
// Compile-time compute IND-CCA-secure Kyber KEM public key length ( in bytes )
static inline constexpr size_t
get_kem_public_key_len(const size_t k)
{
return k * 12 * 32 + 32;
}
// Compile-time compute IND-CCA-secure Kyber KEM secret key length ( in bytes )
static inline constexpr size_t
get_kem_secret_key_len(const size_t k)
{
const size_t t0 = k * 12 * 32;
const size_t t1 = get_kem_public_key_len(k);
return t0 + t1 + 32 + 32;
}
// Compile-time compute IND-CCA-secure Kyber KEM cipher text length ( in bytes )
static inline constexpr size_t
get_kem_cipher_len(size_t k, size_t du, size_t dv)
{
return k * du * 32 + dv * 32;
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
#include "kyber768_kem.hpp"
#include "ml_kem/ml_kem_1024.hpp"
#define DUDECT_IMPLEMENTATION
#define DUDECT_VISIBLITY_STATIC
@@ -12,41 +12,41 @@ do_one_computation(uint8_t* const data)
constexpr size_t doff0 = 0;
constexpr size_t doff1 = doff0 + SEED_LEN;
constexpr size_t doff2 = doff1 + 1;
constexpr size_t doff3 = doff2 + kyber768_kem::CIPHER_LEN;
constexpr size_t doff4 = doff3 + kyber768_kem::CIPHER_LEN;
constexpr size_t doff3 = doff2 + ml_kem_1024::CIPHER_TEXT_BYTE_LEN;
constexpr size_t doff4 = doff3 + ml_kem_1024::CIPHER_TEXT_BYTE_LEN;
constexpr size_t doff5 = doff4 + SEED_LEN;
constexpr size_t doff6 = doff5 + SEED_LEN;
std::array<field::zq_t, kyber768_kem::k * ntt::N> poly_vec{};
std::array<uint8_t, kyber768_kem::k * 32 * kyber768_kem::du> byte_arr{};
std::array<ml_kem_field::zq_t, ml_kem_1024::k * ml_kem_ntt::N> poly_vec{};
std::array<uint8_t, ml_kem_1024::k * 32 * ml_kem_1024::du> byte_arr{};
auto sigma = std::span<const uint8_t, SEED_LEN>(data + doff0, doff1 - doff0);
const auto nonce = data[doff1];
// Generate new secret polynomial vector
kyber_utils::generate_vector<kyber768_kem::k, kyber768_kem::η1>(poly_vec, sigma, nonce);
ml_kem_utils::generate_vector<ml_kem_1024::k, ml_kem_1024::η1>(poly_vec, sigma, nonce);
// Apply NTT on that secret vector
kyber_utils::poly_vec_ntt<kyber768_kem::k>(poly_vec);
ml_kem_utils::poly_vec_ntt<ml_kem_1024::k>(poly_vec);
// Apply iNTT on bit-reversed NTT form of secret polynomial vector
kyber_utils::poly_vec_intt<kyber768_kem::k>(poly_vec);
ml_kem_utils::poly_vec_intt<ml_kem_1024::k>(poly_vec);
// Compress coefficients of polynomial vector
kyber_utils::poly_vec_compress<kyber768_kem::k, kyber768_kem::du>(poly_vec);
ml_kem_utils::poly_vec_compress<ml_kem_1024::k, ml_kem_1024::du>(poly_vec);
// Serialize polynomial vector into byte array
kyber_utils::poly_vec_encode<kyber768_kem::k, kyber768_kem::du>(poly_vec, byte_arr);
ml_kem_utils::poly_vec_encode<ml_kem_1024::k, ml_kem_1024::du>(poly_vec, byte_arr);
// Recover coefficients of polynomial vector from byte array
kyber_utils::poly_vec_decode<kyber768_kem::k, kyber768_kem::du>(byte_arr, poly_vec);
ml_kem_utils::poly_vec_decode<ml_kem_1024::k, ml_kem_1024::du>(byte_arr, poly_vec);
// Decompress coefficients of polynomial vector
kyber_utils::poly_vec_decompress<kyber768_kem::k, kyber768_kem::du>(poly_vec);
ml_kem_utils::poly_vec_decompress<ml_kem_1024::k, ml_kem_1024::du>(poly_vec);
std::array<uint8_t, SEED_LEN> sink{};
auto _sink = std::span(sink);
using ctxt_t = std::span<const uint8_t, kyber768_kem::CIPHER_LEN>;
using ctxt_t = std::span<const uint8_t, ml_kem_1024::CIPHER_TEXT_BYTE_LEN>;
using seed_t = std::span<const uint8_t, SEED_LEN>;
// Ensure Fujisaki-Okamoto transform, used during decapsulation, is constant-time
const uint32_t cond = kyber_utils::ct_memcmp(ctxt_t(data + doff2, doff3 - doff2), ctxt_t(data + doff3, doff4 - doff3));
kyber_utils::ct_cond_memcpy(cond, _sink, seed_t(data + doff4, doff5 - doff4), seed_t(data + doff5, doff6 - doff5));
const uint32_t cond = ml_kem_utils::ct_memcmp(ctxt_t(data + doff2, doff3 - doff2), ctxt_t(data + doff3, doff4 - doff3));
ml_kem_utils::ct_cond_memcpy(cond, _sink, seed_t(data + doff4, doff5 - doff4), seed_t(data + doff5, doff6 - doff5));
// Just so that optimizer doesn't remove above function calls !
return static_cast<uint8_t>(poly_vec[0].raw() ^ poly_vec[poly_vec.size() - 1].raw()) ^ // result of generating vector of polynomials
@@ -69,14 +69,14 @@ prepare_inputs(dudect_config_t* const c, uint8_t* const input_data, uint8_t* con
}
dudect_state_t
test_kyber768_kem()
test_ml_kem_1024()
{
constexpr size_t chunk_size = SEED_LEN + // bytes holding seed `sigma`
1 + // single byte nonce
kyber768_kem::CIPHER_LEN + // bytes holding received cipher text
kyber768_kem::CIPHER_LEN + // bytes for locally computed cipher text
SEED_LEN + // bytes for first source buffer to copy from
SEED_LEN; // bytes for second source buffer to copy from
constexpr size_t chunk_size = SEED_LEN + // bytes holding seed `sigma`
1 + // single byte nonce
ml_kem_1024::CIPHER_TEXT_BYTE_LEN + // bytes holding received cipher text
ml_kem_1024::CIPHER_TEXT_BYTE_LEN + // bytes for locally computed cipher text
SEED_LEN + // bytes for first source buffer to copy from
SEED_LEN; // bytes for second source buffer to copy from
constexpr size_t number_measurements = 1e5;
dudect_config_t config = {
@@ -100,7 +100,7 @@ test_kyber768_kem()
int
main()
{
if (test_kyber768_kem() != DUDECT_NO_LEAKAGE_EVIDENCE_YET) {
if (test_ml_kem_1024() != DUDECT_NO_LEAKAGE_EVIDENCE_YET) {
return EXIT_FAILURE;
}

View File

@@ -1,4 +1,4 @@
#include "kyber512_kem.hpp"
#include "ml_kem/ml_kem_512.hpp"
#include <cstdio>
#define DUDECT_IMPLEMENTATION
@@ -13,41 +13,41 @@ do_one_computation(uint8_t* const data)
constexpr size_t doff0 = 0;
constexpr size_t doff1 = doff0 + SEED_LEN;
constexpr size_t doff2 = doff1 + 1;
constexpr size_t doff3 = doff2 + kyber512_kem::CIPHER_LEN;
constexpr size_t doff4 = doff3 + kyber512_kem::CIPHER_LEN;
constexpr size_t doff3 = doff2 + ml_kem_512::CIPHER_TEXT_BYTE_LEN;
constexpr size_t doff4 = doff3 + ml_kem_512::CIPHER_TEXT_BYTE_LEN;
constexpr size_t doff5 = doff4 + SEED_LEN;
constexpr size_t doff6 = doff5 + SEED_LEN;
std::array<field::zq_t, kyber512_kem::k * ntt::N> poly_vec{};
std::array<uint8_t, kyber512_kem::k * 32 * kyber512_kem::du> byte_arr{};
std::array<ml_kem_field::zq_t, ml_kem_512::k * ml_kem_ntt::N> poly_vec{};
std::array<uint8_t, ml_kem_512::k * 32 * ml_kem_512::du> byte_arr{};
auto sigma = std::span<const uint8_t, SEED_LEN>(data + doff0, doff1 - doff0);
const auto nonce = data[doff1];
// Generate new secret polynomial vector
kyber_utils::generate_vector<kyber512_kem::k, kyber512_kem::η1>(poly_vec, sigma, nonce);
ml_kem_utils::generate_vector<ml_kem_512::k, ml_kem_512::η1>(poly_vec, sigma, nonce);
// Apply NTT on that secret vector
kyber_utils::poly_vec_ntt<kyber512_kem::k>(poly_vec);
ml_kem_utils::poly_vec_ntt<ml_kem_512::k>(poly_vec);
// Apply iNTT on bit-reversed NTT form of secret polynomial vector
kyber_utils::poly_vec_intt<kyber512_kem::k>(poly_vec);
ml_kem_utils::poly_vec_intt<ml_kem_512::k>(poly_vec);
// Compress coefficients of polynomial vector
kyber_utils::poly_vec_compress<kyber512_kem::k, kyber512_kem::du>(poly_vec);
ml_kem_utils::poly_vec_compress<ml_kem_512::k, ml_kem_512::du>(poly_vec);
// Serialize polynomial vector into byte array
kyber_utils::poly_vec_encode<kyber512_kem::k, kyber512_kem::du>(poly_vec, byte_arr);
ml_kem_utils::poly_vec_encode<ml_kem_512::k, ml_kem_512::du>(poly_vec, byte_arr);
// Recover coefficients of polynomial vector from byte array
kyber_utils::poly_vec_decode<kyber512_kem::k, kyber512_kem::du>(byte_arr, poly_vec);
ml_kem_utils::poly_vec_decode<ml_kem_512::k, ml_kem_512::du>(byte_arr, poly_vec);
// Decompress coefficients of polynomial vector
kyber_utils::poly_vec_decompress<kyber512_kem::k, kyber512_kem::du>(poly_vec);
ml_kem_utils::poly_vec_decompress<ml_kem_512::k, ml_kem_512::du>(poly_vec);
std::array<uint8_t, SEED_LEN> sink{};
auto _sink = std::span(sink);
using ctxt_t = std::span<const uint8_t, kyber512_kem::CIPHER_LEN>;
using ctxt_t = std::span<const uint8_t, ml_kem_512::CIPHER_TEXT_BYTE_LEN>;
using seed_t = std::span<const uint8_t, SEED_LEN>;
// Ensure Fujisaki-Okamoto transform, used during decapsulation, is constant-time
const uint32_t cond = kyber_utils::ct_memcmp(ctxt_t(data + doff2, doff3 - doff2), ctxt_t(data + doff3, doff4 - doff3));
kyber_utils::ct_cond_memcpy(cond, _sink, seed_t(data + doff4, doff5 - doff4), seed_t(data + doff5, doff6 - doff5));
const uint32_t cond = ml_kem_utils::ct_memcmp(ctxt_t(data + doff2, doff3 - doff2), ctxt_t(data + doff3, doff4 - doff3));
ml_kem_utils::ct_cond_memcpy(cond, _sink, seed_t(data + doff4, doff5 - doff4), seed_t(data + doff5, doff6 - doff5));
// Just so that optimizer doesn't remove above function calls !
return static_cast<uint8_t>(poly_vec[0].raw() ^ poly_vec[poly_vec.size() - 1].raw()) ^ // result of generating vector of polynomials
@@ -70,14 +70,14 @@ prepare_inputs(dudect_config_t* const c, uint8_t* const input_data, uint8_t* con
}
dudect_state_t
test_kyber512_kem()
test_ml_kem_512()
{
constexpr size_t chunk_size = SEED_LEN + // bytes holding seed `sigma`
1 + // single byte nonce
kyber512_kem::CIPHER_LEN + // bytes holding received cipher text
kyber512_kem::CIPHER_LEN + // bytes for locally computed cipher text
SEED_LEN + // bytes for first source buffer to copy from
SEED_LEN; // bytes for second source buffer to copy from
constexpr size_t chunk_size = SEED_LEN + // bytes holding seed `sigma`
1 + // single byte nonce
ml_kem_512::CIPHER_TEXT_BYTE_LEN + // bytes holding received cipher text
ml_kem_512::CIPHER_TEXT_BYTE_LEN + // bytes for locally computed cipher text
SEED_LEN + // bytes for first source buffer to copy from
SEED_LEN; // bytes for second source buffer to copy from
constexpr size_t number_measurements = 1e5;
dudect_config_t config = {
@@ -101,7 +101,7 @@ test_kyber512_kem()
int
main()
{
if (test_kyber512_kem() != DUDECT_NO_LEAKAGE_EVIDENCE_YET) {
if (test_ml_kem_512() != DUDECT_NO_LEAKAGE_EVIDENCE_YET) {
return EXIT_FAILURE;
}

View File

@@ -1,4 +1,4 @@
#include "kyber1024_kem.hpp"
#include "ml_kem/ml_kem_768.hpp"
#define DUDECT_IMPLEMENTATION
#define DUDECT_VISIBLITY_STATIC
@@ -12,41 +12,41 @@ do_one_computation(uint8_t* const data)
constexpr size_t doff0 = 0;
constexpr size_t doff1 = doff0 + SEED_LEN;
constexpr size_t doff2 = doff1 + 1;
constexpr size_t doff3 = doff2 + kyber1024_kem::CIPHER_LEN;
constexpr size_t doff4 = doff3 + kyber1024_kem::CIPHER_LEN;
constexpr size_t doff3 = doff2 + ml_kem_768::CIPHER_TEXT_BYTE_LEN;
constexpr size_t doff4 = doff3 + ml_kem_768::CIPHER_TEXT_BYTE_LEN;
constexpr size_t doff5 = doff4 + SEED_LEN;
constexpr size_t doff6 = doff5 + SEED_LEN;
std::array<field::zq_t, kyber1024_kem::k * ntt::N> poly_vec{};
std::array<uint8_t, kyber1024_kem::k * 32 * kyber1024_kem::du> byte_arr{};
std::array<ml_kem_field::zq_t, ml_kem_768::k * ml_kem_ntt::N> poly_vec{};
std::array<uint8_t, ml_kem_768::k * 32 * ml_kem_768::du> byte_arr{};
auto sigma = std::span<const uint8_t, SEED_LEN>(data + doff0, doff1 - doff0);
const auto nonce = data[doff1];
// Generate new secret polynomial vector
kyber_utils::generate_vector<kyber1024_kem::k, kyber1024_kem::η1>(poly_vec, sigma, nonce);
ml_kem_utils::generate_vector<ml_kem_768::k, ml_kem_768::η1>(poly_vec, sigma, nonce);
// Apply NTT on that secret vector
kyber_utils::poly_vec_ntt<kyber1024_kem::k>(poly_vec);
ml_kem_utils::poly_vec_ntt<ml_kem_768::k>(poly_vec);
// Apply iNTT on bit-reversed NTT form of secret polynomial vector
kyber_utils::poly_vec_intt<kyber1024_kem::k>(poly_vec);
ml_kem_utils::poly_vec_intt<ml_kem_768::k>(poly_vec);
// Compress coefficients of polynomial vector
kyber_utils::poly_vec_compress<kyber1024_kem::k, kyber1024_kem::du>(poly_vec);
ml_kem_utils::poly_vec_compress<ml_kem_768::k, ml_kem_768::du>(poly_vec);
// Serialize polynomial vector into byte array
kyber_utils::poly_vec_encode<kyber1024_kem::k, kyber1024_kem::du>(poly_vec, byte_arr);
ml_kem_utils::poly_vec_encode<ml_kem_768::k, ml_kem_768::du>(poly_vec, byte_arr);
// Recover coefficients of polynomial vector from byte array
kyber_utils::poly_vec_decode<kyber1024_kem::k, kyber1024_kem::du>(byte_arr, poly_vec);
ml_kem_utils::poly_vec_decode<ml_kem_768::k, ml_kem_768::du>(byte_arr, poly_vec);
// Decompress coefficients of polynomial vector
kyber_utils::poly_vec_decompress<kyber1024_kem::k, kyber1024_kem::du>(poly_vec);
ml_kem_utils::poly_vec_decompress<ml_kem_768::k, ml_kem_768::du>(poly_vec);
std::array<uint8_t, SEED_LEN> sink{};
auto _sink = std::span(sink);
using ctxt_t = std::span<const uint8_t, kyber1024_kem::CIPHER_LEN>;
using ctxt_t = std::span<const uint8_t, ml_kem_768::CIPHER_TEXT_BYTE_LEN>;
using seed_t = std::span<const uint8_t, SEED_LEN>;
// Ensure Fujisaki-Okamoto transform, used during decapsulation, is constant-time
const uint32_t cond = kyber_utils::ct_memcmp(ctxt_t(data + doff2, doff3 - doff2), ctxt_t(data + doff3, doff4 - doff3));
kyber_utils::ct_cond_memcpy(cond, _sink, seed_t(data + doff4, doff5 - doff4), seed_t(data + doff5, doff6 - doff5));
const uint32_t cond = ml_kem_utils::ct_memcmp(ctxt_t(data + doff2, doff3 - doff2), ctxt_t(data + doff3, doff4 - doff3));
ml_kem_utils::ct_cond_memcpy(cond, _sink, seed_t(data + doff4, doff5 - doff4), seed_t(data + doff5, doff6 - doff5));
// Just so that optimizer doesn't remove above function calls !
return static_cast<uint8_t>(poly_vec[0].raw() ^ poly_vec[poly_vec.size() - 1].raw()) ^ // result of generating vector of polynomials
@@ -69,14 +69,14 @@ prepare_inputs(dudect_config_t* const c, uint8_t* const input_data, uint8_t* con
}
dudect_state_t
test_kyber1024_kem()
test_ml_kem_768()
{
constexpr size_t chunk_size = SEED_LEN + // bytes holding seed `sigma`
1 + // single byte nonce
kyber1024_kem::CIPHER_LEN + // bytes holding received cipher text
kyber1024_kem::CIPHER_LEN + // bytes for locally computed cipher text
SEED_LEN + // bytes for first source buffer to copy from
SEED_LEN; // bytes for second source buffer to copy from
constexpr size_t chunk_size = SEED_LEN + // bytes holding seed `sigma`
1 + // single byte nonce
ml_kem_768::CIPHER_TEXT_BYTE_LEN + // bytes holding received cipher text
ml_kem_768::CIPHER_TEXT_BYTE_LEN + // bytes for locally computed cipher text
SEED_LEN + // bytes for first source buffer to copy from
SEED_LEN; // bytes for second source buffer to copy from
constexpr size_t number_measurements = 1e5;
dudect_config_t config = {
@@ -100,7 +100,7 @@ test_kyber1024_kem()
int
main()
{
if (test_kyber1024_kem() != DUDECT_NO_LEAKAGE_EVIDENCE_YET) {
if (test_ml_kem_768() != DUDECT_NO_LEAKAGE_EVIDENCE_YET) {
return EXIT_FAILURE;
}

View File

@@ -1,4 +1,4 @@
#include "compression.hpp"
#include "ml_kem/internals/poly/compression.hpp"
#include <gtest/gtest.h>
// Decompression error that can happen for some given `d` s.t.
@@ -7,13 +7,13 @@
//
// |(x' - x) mod q| <= round(q / 2 ^ (d + 1))
//
// See eq. 2 of Kyber specification
// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf
// See eq. 2 of Ml_kem specification
// https://doi.org/10.6028/NIST.FIPS.203.ipd
template<size_t d>
static inline constexpr size_t
compute_error()
{
constexpr double t0 = static_cast<double>(field::Q);
constexpr double t0 = static_cast<double>(ml_kem_field::Q);
constexpr double t1 = static_cast<double>(1ul << (d + 1));
const size_t t2 = static_cast<size_t>(std::round(t0 / t1));
@@ -36,23 +36,23 @@ test_zq_compression()
requires(itr_cnt > 0)
{
bool res = true;
prng::prng_t prng;
ml_kem_prng::prng_t<256> prng{};
for (size_t i = 0; i < itr_cnt; i++) {
const auto a = field::zq_t::random(prng);
const auto a = ml_kem_field::zq_t::random(prng);
const auto b = kyber_utils::compress<d>(a);
const auto c = kyber_utils::decompress<d>(b);
const auto b = ml_kem_utils::compress<d>(a);
const auto c = ml_kem_utils::decompress<d>(b);
const auto a_canon = a.raw();
const auto c_canon = c.raw();
const uint32_t br0[]{ static_cast<uint16_t>(field::Q - c_canon), c_canon };
const bool flg0 = c_canon <= (field::Q >> 1);
const uint32_t br0[]{ static_cast<uint16_t>(ml_kem_field::Q - c_canon), c_canon };
const bool flg0 = c_canon <= (ml_kem_field::Q >> 1);
const auto c_prime = static_cast<int32_t>(br0[flg0]);
const uint32_t br1[]{ static_cast<uint16_t>(field::Q - a_canon), a_canon };
const bool flg1 = a_canon <= (field::Q >> 1);
const uint32_t br1[]{ static_cast<uint16_t>(ml_kem_field::Q - a_canon), a_canon };
const bool flg1 = a_canon <= (ml_kem_field::Q >> 1);
const auto a_prime = static_cast<int32_t>(br1[flg1]);
const size_t err = static_cast<size_t>(std::abs(c_prime - a_prime));
@@ -64,7 +64,7 @@ test_zq_compression()
return res;
}
TEST(KyberKEM, CompressDecompressZq)
TEST(ML_KEM, CompressDecompressZq)
{
EXPECT_TRUE((test_zq_compression<11, 1ul << 20>()));
EXPECT_TRUE((test_zq_compression<10, 1ul << 20>()));

View File

@@ -1,17 +1,18 @@
#include "field.hpp"
#include "ml_kem/internals/math/field.hpp"
#include <gtest/gtest.h>
// Test functional correctness of Kyber prime field operations ( using
// Test functional correctness of Ml_kem prime field operations ( using
// Montgomery Arithmetic ), by running through multiple rounds of execution of
// field operations on randomly sampled field elements.
TEST(KyberKEM, ArithmeticOverZq)
TEST(ML_KEM, ArithmeticOverZq)
{
constexpr size_t itr_cnt = 1ul << 20;
prng::prng_t prng;
static constexpr size_t itr_cnt = 1ul << 20;
ml_kem_prng::prng_t<128> prng{};
for (size_t i = 0; i < itr_cnt; i++) {
const auto a = field::zq_t::random(prng);
const auto b = field::zq_t::random(prng);
const auto a = ml_kem_field::zq_t::random(prng);
const auto b = ml_kem_field::zq_t::random(prng);
// Addition, Subtraction and Negation
const auto c = a + b;
@@ -26,16 +27,16 @@ TEST(KyberKEM, ArithmeticOverZq)
const auto g = f / b;
const auto h = f / a;
if (b != field::zq_t()) {
if (b != ml_kem_field::zq_t()) {
EXPECT_EQ(g, a);
} else {
EXPECT_EQ(g, field::zq_t());
EXPECT_EQ(g, ml_kem_field::zq_t());
}
if (a != field::zq_t()) {
if (a != ml_kem_field::zq_t()) {
EXPECT_EQ(h, b);
} else {
EXPECT_EQ(h, field::zq_t());
EXPECT_EQ(h, ml_kem_field::zq_t());
}
}
}

32
tests/test_helper.hpp Normal file
View File

@@ -0,0 +1,32 @@
#pragma once
#include <array>
#include <cassert>
#include <charconv>
#include <cstddef>
#include <cstdint>
#include <string_view>
// Given a hex encoded string of length 2*L, this routine can be used for parsing it as a byte array of length L.
template<size_t L>
static inline std::array<uint8_t, L>
from_hex(std::string_view bytes)
{
const size_t blen = bytes.length();
assert(blen % 2 == 0);
assert(blen / 2 == L);
std::array<uint8_t, L> res{};
for (size_t i = 0; i < L; i++) {
const size_t off = i * 2;
uint8_t byte = 0;
auto sstr = bytes.substr(off, 2);
std::from_chars(sstr.data(), sstr.data() + 2, byte, 16);
res[i] = byte;
}
return res;
}

View File

@@ -1,72 +0,0 @@
#include "kem.hpp"
#include "utils.hpp"
#include <gtest/gtest.h>
// Given k, η1, η2, du, dv - Kyber parameters, this routine checks whether
//
// - A new key pair can be generated for key establishment over insecure channel
// - Key pair is for receiving party, its public key will be used by sender.
// - Sender can produce a cipher text and a key derivation function ( KDF )
// - Sender uses receiver's public key.
// - Cipher text is sent over insecure channel to receiver
// - Receiver can decrypt message ( using secret key ) and arrives at same KDF
// - Both parties use KDF ( SHAKE256 hasher object ) to generate arbitrary
// length shared secret key.
// - This shared secret key can now be used with any symmetric key primitive.
//
// works as expected.
template<size_t k, size_t eta1, size_t eta2, size_t du, size_t dv, size_t klen>
void
test_kyber_kem()
requires(klen > 0)
{
constexpr size_t slen = 32;
constexpr size_t pklen = kyber_utils::get_kem_public_key_len(k);
constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(k);
constexpr size_t ctlen = kyber_utils::get_kem_cipher_len(k, du, dv);
std::vector<uint8_t> d(slen);
std::vector<uint8_t> z(slen);
std::vector<uint8_t> m(slen);
std::vector<uint8_t> pkey(pklen);
std::vector<uint8_t> skey(sklen);
std::vector<uint8_t> cipher(ctlen);
std::vector<uint8_t> sender_key(klen);
std::vector<uint8_t> receiver_key(klen);
auto _d = std::span<uint8_t, slen>(d);
auto _z = std::span<uint8_t, slen>(z);
auto _m = std::span<uint8_t, slen>(m);
auto _pkey = std::span<uint8_t, pklen>(pkey);
auto _skey = std::span<uint8_t, sklen>(skey);
auto _cipher = std::span<uint8_t, ctlen>(cipher);
prng::prng_t prng;
prng.read(d);
prng.read(z);
prng.read(m);
kem::keygen<k, eta1>(_d, _z, _pkey, _skey);
auto skdf = kem::encapsulate<k, eta1, eta2, du, dv>(_m, _pkey, _cipher);
auto rkdf = kem::decapsulate<k, eta1, eta2, du, dv>(_skey, _cipher);
skdf.squeeze(sender_key);
rkdf.squeeze(receiver_key);
EXPECT_EQ(sender_key, receiver_key);
}
TEST(KyberKEM, Kyber512KeygenEncapsDecaps)
{
test_kyber_kem<2, 3, 2, 10, 4, 32>();
}
TEST(KyberKEM, Kyber768KeygenEncapsDecaps)
{
test_kyber_kem<3, 2, 2, 10, 4, 32>();
}
TEST(KyberKEM, Kyber1024KeygenEncapsDecaps)
{
test_kyber_kem<4, 2, 2, 11, 5, 32>();
}

View File

@@ -1,272 +0,0 @@
#include "kyber1024_kem.hpp"
#include "kyber512_kem.hpp"
#include "kyber768_kem.hpp"
#include "utils.hpp"
#include <fstream>
#include <gtest/gtest.h>
// Test if
//
// - Is Kyber512 KEM implemented correctly ?
// - Is it conformant with the specification ?
//
// using Known Answer Tests, generated following
// https://gist.github.com/itzmeanjan/c8f5bc9640d0f0bdd2437dfe364d7710.
TEST(KyberKEM, Kyber512KnownAnswerTests)
{
using namespace std::literals;
namespace utils = kyber_utils;
namespace kyber512 = kyber512_kem;
const std::string kat_file = "./kats/kyber512.kat";
std::fstream file(kat_file);
while (true) {
std::string d;
if (!std::getline(file, d).eof()) {
std::string z;
std::string pk;
std::string sk;
std::string m;
std::string ct;
std::string ss;
std::getline(file, z);
std::getline(file, pk);
std::getline(file, sk);
std::getline(file, m);
std::getline(file, ct);
std::getline(file, ss);
auto _d = std::string_view(d);
auto __d = _d.substr(_d.find("="sv) + 2, _d.size());
auto ___d = utils::from_hex<32>(__d);
auto _z = std::string_view(z);
auto __z = _z.substr(_z.find("="sv) + 2, _z.size());
auto ___z = utils::from_hex<32>(__z);
auto _pk = std::string_view(pk);
auto __pk = _pk.substr(_pk.find("="sv) + 2, _pk.size());
auto ___pk = utils::from_hex<kyber512::PKEY_LEN>(__pk);
auto _sk = std::string_view(sk);
auto __sk = _sk.substr(_sk.find("="sv) + 2, _sk.size());
auto ___sk = utils::from_hex<kyber512::SKEY_LEN>(__sk);
auto _m = std::string_view(m);
auto __m = _m.substr(_m.find("="sv) + 2, _m.size());
auto ___m = utils::from_hex<32>(__m);
auto _ct = std::string_view(ct);
auto __ct = _ct.substr(_ct.find("="sv) + 2, _ct.size());
auto ___ct = utils::from_hex<kyber512::CIPHER_LEN>(__ct);
auto _ss = std::string_view(ss);
auto __ss = _ss.substr(_ss.find("="sv) + 2, _ss.size());
auto ___ss = utils::from_hex<32>(__ss);
std::array<uint8_t, kyber512::PKEY_LEN> pkey{};
std::array<uint8_t, kyber512::SKEY_LEN> skey{};
std::array<uint8_t, kyber512::CIPHER_LEN> ctxt{};
std::array<uint8_t, 32> shrd_sec0{};
std::array<uint8_t, 32> shrd_sec1{};
kyber512::keygen(___d, ___z, pkey, skey);
auto skdf = kyber512::encapsulate(___m, pkey, ctxt);
auto rkdf = kyber512::decapsulate(skey, ctxt);
skdf.squeeze(shrd_sec0);
rkdf.squeeze(shrd_sec1);
EXPECT_EQ(___pk, pkey);
EXPECT_EQ(___sk, skey);
EXPECT_EQ(___ct, ctxt);
EXPECT_EQ(___ss, shrd_sec0);
EXPECT_EQ(shrd_sec0, shrd_sec1);
std::string empty_line;
std::getline(file, empty_line);
} else {
break;
}
}
file.close();
}
TEST(KyberKEM, Kyber768KnownAnswerTests)
{
using namespace std::literals;
namespace utils = kyber_utils;
namespace kyber768 = kyber768_kem;
const std::string kat_file = "./kats/kyber768.kat";
std::fstream file(kat_file);
while (true) {
std::string d;
if (!std::getline(file, d).eof()) {
std::string z;
std::string pk;
std::string sk;
std::string m;
std::string ct;
std::string ss;
std::getline(file, z);
std::getline(file, pk);
std::getline(file, sk);
std::getline(file, m);
std::getline(file, ct);
std::getline(file, ss);
auto _d = std::string_view(d);
auto __d = _d.substr(_d.find("="sv) + 2, _d.size());
auto ___d = utils::from_hex<32>(__d);
auto _z = std::string_view(z);
auto __z = _z.substr(_z.find("="sv) + 2, _z.size());
auto ___z = utils::from_hex<32>(__z);
auto _pk = std::string_view(pk);
auto __pk = _pk.substr(_pk.find("="sv) + 2, _pk.size());
auto ___pk = utils::from_hex<kyber768::PKEY_LEN>(__pk);
auto _sk = std::string_view(sk);
auto __sk = _sk.substr(_sk.find("="sv) + 2, _sk.size());
auto ___sk = utils::from_hex<kyber768::SKEY_LEN>(__sk);
auto _m = std::string_view(m);
auto __m = _m.substr(_m.find("="sv) + 2, _m.size());
auto ___m = utils::from_hex<32>(__m);
auto _ct = std::string_view(ct);
auto __ct = _ct.substr(_ct.find("="sv) + 2, _ct.size());
auto ___ct = utils::from_hex<kyber768::CIPHER_LEN>(__ct);
auto _ss = std::string_view(ss);
auto __ss = _ss.substr(_ss.find("="sv) + 2, _ss.size());
auto ___ss = utils::from_hex<32>(__ss);
std::array<uint8_t, kyber768::PKEY_LEN> pkey{};
std::array<uint8_t, kyber768::SKEY_LEN> skey{};
std::array<uint8_t, kyber768::CIPHER_LEN> ctxt{};
std::array<uint8_t, 32> shrd_sec0{};
std::array<uint8_t, 32> shrd_sec1{};
kyber768::keygen(___d, ___z, pkey, skey);
auto skdf = kyber768::encapsulate(___m, pkey, ctxt);
auto rkdf = kyber768::decapsulate(skey, ctxt);
skdf.squeeze(shrd_sec0);
rkdf.squeeze(shrd_sec1);
EXPECT_EQ(___pk, pkey);
EXPECT_EQ(___sk, skey);
EXPECT_EQ(___ct, ctxt);
EXPECT_EQ(___ss, shrd_sec0);
EXPECT_EQ(shrd_sec0, shrd_sec1);
std::string empty_line;
std::getline(file, empty_line);
} else {
break;
}
}
file.close();
}
// Test if
//
// - Is Kyber1024 KEM implemented correctly ?
// - Is it conformant with the specification ?
//
// using Known Answer Tests, generated following
// https://gist.github.com/itzmeanjan/c8f5bc9640d0f0bdd2437dfe364d7710.
TEST(KyberKEM, Kyber1024KnownAnswerTests)
{
using namespace std::literals;
namespace utils = kyber_utils;
namespace kyber1024 = kyber1024_kem;
const std::string kat_file = "./kats/kyber1024.kat";
std::fstream file(kat_file);
while (true) {
std::string d;
if (!std::getline(file, d).eof()) {
std::string z;
std::string pk;
std::string sk;
std::string m;
std::string ct;
std::string ss;
std::getline(file, z);
std::getline(file, pk);
std::getline(file, sk);
std::getline(file, m);
std::getline(file, ct);
std::getline(file, ss);
auto _d = std::string_view(d);
auto __d = _d.substr(_d.find("="sv) + 2, _d.size());
auto ___d = utils::from_hex<32>(__d);
auto _z = std::string_view(z);
auto __z = _z.substr(_z.find("="sv) + 2, _z.size());
auto ___z = utils::from_hex<32>(__z);
auto _pk = std::string_view(pk);
auto __pk = _pk.substr(_pk.find("="sv) + 2, _pk.size());
auto ___pk = utils::from_hex<kyber1024::PKEY_LEN>(__pk);
auto _sk = std::string_view(sk);
auto __sk = _sk.substr(_sk.find("="sv) + 2, _sk.size());
auto ___sk = utils::from_hex<kyber1024::SKEY_LEN>(__sk);
auto _m = std::string_view(m);
auto __m = _m.substr(_m.find("="sv) + 2, _m.size());
auto ___m = utils::from_hex<32>(__m);
auto _ct = std::string_view(ct);
auto __ct = _ct.substr(_ct.find("="sv) + 2, _ct.size());
auto ___ct = utils::from_hex<kyber1024::CIPHER_LEN>(__ct);
auto _ss = std::string_view(ss);
auto __ss = _ss.substr(_ss.find("="sv) + 2, _ss.size());
auto ___ss = utils::from_hex<32>(__ss);
std::array<uint8_t, kyber1024::PKEY_LEN> pkey{};
std::array<uint8_t, kyber1024::SKEY_LEN> skey{};
std::array<uint8_t, kyber1024::CIPHER_LEN> ctxt{};
std::array<uint8_t, 32> shrd_sec0{};
std::array<uint8_t, 32> shrd_sec1{};
kyber1024::keygen(___d, ___z, pkey, skey);
auto skdf = kyber1024::encapsulate(___m, pkey, ctxt);
auto rkdf = kyber1024::decapsulate(skey, ctxt);
skdf.squeeze(shrd_sec0);
rkdf.squeeze(shrd_sec1);
EXPECT_EQ(___pk, pkey);
EXPECT_EQ(___sk, skey);
EXPECT_EQ(___ct, ctxt);
EXPECT_EQ(___ss, shrd_sec0);
EXPECT_EQ(shrd_sec0, shrd_sec1);
std::string empty_line;
std::getline(file, empty_line);
} else {
break;
}
}
file.close();
}

View File

@@ -0,0 +1,38 @@
#include "ml_kem/ml_kem_1024.hpp"
#include <gtest/gtest.h>
// For ML-KEM-1024
//
// - A new key pair can be generated for key establishment over insecure channel.
// - Secret key is for receiving party, its public key will be used by sender.
// - Sender can produce a cipher text and a shared secret.
// - Sender uses receiver's public key.
// - Cipher text is sent over insecure channel to receiver.
// - Receiver can decrypt message ( using secret key ) and produce same shared secret.
//
// works as expected.
TEST(ML_KEM, ML_KEM_1024_KeygenEncapsDecaps)
{
std::array<uint8_t, ml_kem_1024::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_1024::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_1024::SEED_M_BYTE_LEN> seed_m{};
std::array<uint8_t, ml_kem_1024::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_1024::SKEY_BYTE_LEN> seckey{};
std::array<uint8_t, ml_kem_1024::CIPHER_TEXT_BYTE_LEN> cipher{};
std::array<uint8_t, ml_kem_1024::SHARED_SECRET_BYTE_LEN> shared_secret_sender{};
std::array<uint8_t, ml_kem_1024::SHARED_SECRET_BYTE_LEN> shared_secret_receiver{};
ml_kem_prng::prng_t<256> prng{};
prng.read(seed_d);
prng.read(seed_z);
prng.read(seed_m);
ml_kem_1024::keygen(seed_d, seed_z, pubkey, seckey);
const auto is_encapsulated = ml_kem_1024::encapsulate(seed_m, pubkey, cipher, shared_secret_sender);
ml_kem_1024::decapsulate(seckey, cipher, shared_secret_receiver);
EXPECT_TRUE(is_encapsulated);
EXPECT_EQ(shared_secret_sender, shared_secret_receiver);
}

View File

@@ -0,0 +1,90 @@
#include "ml_kem/ml_kem_1024.hpp"
#include "test_helper.hpp"
#include <fstream>
#include <gtest/gtest.h>
// Test if
//
// - Is ML-KEM-1024 implemented correctly ?
// - Is it conformant with the specification https://doi.org/10.6028/NIST.FIPS.203.ipd ?
//
// using Known Answer Tests, generated following
// https://gist.github.com/itzmeanjan/c8f5bc9640d0f0bdd2437dfe364d7710.
TEST(ML_KEM, ML_KEM_1024_KnownAnswerTests)
{
using namespace std::literals;
const std::string kat_file = "./kats/ml_kem_1024.kat";
std::fstream file(kat_file);
while (true) {
std::string d;
if (!std::getline(file, d).eof()) {
std::string z;
std::string pk;
std::string sk;
std::string m;
std::string ct;
std::string ss;
std::getline(file, z);
std::getline(file, pk);
std::getline(file, sk);
std::getline(file, m);
std::getline(file, ct);
std::getline(file, ss);
auto _d = std::string_view(d);
auto __d = _d.substr(_d.find("="sv) + 2, _d.size());
auto ___d = from_hex<32>(__d);
auto _z = std::string_view(z);
auto __z = _z.substr(_z.find("="sv) + 2, _z.size());
auto ___z = from_hex<32>(__z);
auto _pk = std::string_view(pk);
auto __pk = _pk.substr(_pk.find("="sv) + 2, _pk.size());
auto ___pk = from_hex<ml_kem_1024::PKEY_BYTE_LEN>(__pk);
auto _sk = std::string_view(sk);
auto __sk = _sk.substr(_sk.find("="sv) + 2, _sk.size());
auto ___sk = from_hex<ml_kem_1024::SKEY_BYTE_LEN>(__sk);
auto _m = std::string_view(m);
auto __m = _m.substr(_m.find("="sv) + 2, _m.size());
auto ___m = from_hex<32>(__m);
auto _ct = std::string_view(ct);
auto __ct = _ct.substr(_ct.find("="sv) + 2, _ct.size());
auto ___ct = from_hex<ml_kem_1024::CIPHER_TEXT_BYTE_LEN>(__ct);
auto _ss = std::string_view(ss);
auto __ss = _ss.substr(_ss.find("="sv) + 2, _ss.size());
auto ___ss = from_hex<32>(__ss);
std::array<uint8_t, ml_kem_1024::PKEY_BYTE_LEN> pkey{};
std::array<uint8_t, ml_kem_1024::SKEY_BYTE_LEN> skey{};
std::array<uint8_t, ml_kem_1024::CIPHER_TEXT_BYTE_LEN> ctxt{};
std::array<uint8_t, ml_kem_1024::SHARED_SECRET_BYTE_LEN> shrd_sec0{};
std::array<uint8_t, ml_kem_1024::SHARED_SECRET_BYTE_LEN> shrd_sec1{};
ml_kem_1024::keygen(___d, ___z, pkey, skey);
EXPECT_TRUE(ml_kem_1024::encapsulate(___m, pkey, ctxt, shrd_sec0));
ml_kem_1024::decapsulate(skey, ctxt, shrd_sec1);
EXPECT_EQ(___pk, pkey);
EXPECT_EQ(___sk, skey);
EXPECT_EQ(___ct, ctxt);
EXPECT_EQ(___ss, shrd_sec0);
EXPECT_EQ(shrd_sec0, shrd_sec1);
std::string empty_line;
std::getline(file, empty_line);
} else {
break;
}
}
file.close();
}

38
tests/test_ml_kem_512.cpp Normal file
View File

@@ -0,0 +1,38 @@
#include "ml_kem/ml_kem_512.hpp"
#include <gtest/gtest.h>
// For ML-KEM-512
//
// - A new key pair can be generated for key establishment over insecure channel.
// - Secret key is for receiving party, its public key will be used by sender.
// - Sender can produce a cipher text and a shared secret.
// - Sender uses receiver's public key.
// - Cipher text is sent over insecure channel to receiver.
// - Receiver can decrypt message ( using secret key ) and produce same shared secret.
//
// works as expected.
TEST(ML_KEM, ML_KEM_512_KeygenEncapsDecaps)
{
std::array<uint8_t, ml_kem_512::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_512::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_512::SEED_M_BYTE_LEN> seed_m{};
std::array<uint8_t, ml_kem_512::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_512::SKEY_BYTE_LEN> seckey{};
std::array<uint8_t, ml_kem_512::CIPHER_TEXT_BYTE_LEN> cipher{};
std::array<uint8_t, ml_kem_512::SHARED_SECRET_BYTE_LEN> shared_secret_sender{};
std::array<uint8_t, ml_kem_512::SHARED_SECRET_BYTE_LEN> shared_secret_receiver{};
ml_kem_prng::prng_t<128> prng{};
prng.read(seed_d);
prng.read(seed_z);
prng.read(seed_m);
ml_kem_512::keygen(seed_d, seed_z, pubkey, seckey);
const auto is_encapsulated = ml_kem_512::encapsulate(seed_m, pubkey, cipher, shared_secret_sender);
ml_kem_512::decapsulate(seckey, cipher, shared_secret_receiver);
EXPECT_TRUE(is_encapsulated);
EXPECT_EQ(shared_secret_sender, shared_secret_receiver);
}

View File

@@ -0,0 +1,90 @@
#include "ml_kem/ml_kem_512.hpp"
#include "test_helper.hpp"
#include <fstream>
#include <gtest/gtest.h>
// Test if
//
// - Is ML-KEM-512 implemented correctly ?
// - Is it conformant with the specification https://doi.org/10.6028/NIST.FIPS.203.ipd ?
//
// using Known Answer Tests, generated following
// https://gist.github.com/itzmeanjan/c8f5bc9640d0f0bdd2437dfe364d7710.
TEST(ML_KEM, ML_KEM_512_KnownAnswerTests)
{
using namespace std::literals;
const std::string kat_file = "./kats/ml_kem_512.kat";
std::fstream file(kat_file);
while (true) {
std::string d;
if (!std::getline(file, d).eof()) {
std::string z;
std::string pk;
std::string sk;
std::string m;
std::string ct;
std::string ss;
std::getline(file, z);
std::getline(file, pk);
std::getline(file, sk);
std::getline(file, m);
std::getline(file, ct);
std::getline(file, ss);
auto _d = std::string_view(d);
auto __d = _d.substr(_d.find("="sv) + 2, _d.size());
auto ___d = from_hex<32>(__d);
auto _z = std::string_view(z);
auto __z = _z.substr(_z.find("="sv) + 2, _z.size());
auto ___z = from_hex<32>(__z);
auto _pk = std::string_view(pk);
auto __pk = _pk.substr(_pk.find("="sv) + 2, _pk.size());
auto ___pk = from_hex<ml_kem_512::PKEY_BYTE_LEN>(__pk);
auto _sk = std::string_view(sk);
auto __sk = _sk.substr(_sk.find("="sv) + 2, _sk.size());
auto ___sk = from_hex<ml_kem_512::SKEY_BYTE_LEN>(__sk);
auto _m = std::string_view(m);
auto __m = _m.substr(_m.find("="sv) + 2, _m.size());
auto ___m = from_hex<32>(__m);
auto _ct = std::string_view(ct);
auto __ct = _ct.substr(_ct.find("="sv) + 2, _ct.size());
auto ___ct = from_hex<ml_kem_512::CIPHER_TEXT_BYTE_LEN>(__ct);
auto _ss = std::string_view(ss);
auto __ss = _ss.substr(_ss.find("="sv) + 2, _ss.size());
auto ___ss = from_hex<32>(__ss);
std::array<uint8_t, ml_kem_512::PKEY_BYTE_LEN> pkey{};
std::array<uint8_t, ml_kem_512::SKEY_BYTE_LEN> skey{};
std::array<uint8_t, ml_kem_512::CIPHER_TEXT_BYTE_LEN> ctxt{};
std::array<uint8_t, ml_kem_512::SHARED_SECRET_BYTE_LEN> shrd_sec0{};
std::array<uint8_t, ml_kem_512::SHARED_SECRET_BYTE_LEN> shrd_sec1{};
ml_kem_512::keygen(___d, ___z, pkey, skey);
EXPECT_TRUE(ml_kem_512::encapsulate(___m, pkey, ctxt, shrd_sec0));
ml_kem_512::decapsulate(skey, ctxt, shrd_sec1);
EXPECT_EQ(___pk, pkey);
EXPECT_EQ(___sk, skey);
EXPECT_EQ(___ct, ctxt);
EXPECT_EQ(___ss, shrd_sec0);
EXPECT_EQ(shrd_sec0, shrd_sec1);
std::string empty_line;
std::getline(file, empty_line);
} else {
break;
}
}
file.close();
}

38
tests/test_ml_kem_768.cpp Normal file
View File

@@ -0,0 +1,38 @@
#include "ml_kem/ml_kem_768.hpp"
#include <gtest/gtest.h>
// For ML-KEM-768
//
// - A new key pair can be generated for key establishment over insecure channel.
// - Secret key is for receiving party, its public key will be used by sender.
// - Sender can produce a cipher text and a shared secret.
// - Sender uses receiver's public key.
// - Cipher text is sent over insecure channel to receiver.
// - Receiver can decrypt message ( using secret key ) and produce same shared secret.
//
// works as expected.
TEST(ML_KEM, ML_KEM_768_KeygenEncapsDecaps)
{
std::array<uint8_t, ml_kem_768::SEED_D_BYTE_LEN> seed_d{};
std::array<uint8_t, ml_kem_768::SEED_Z_BYTE_LEN> seed_z{};
std::array<uint8_t, ml_kem_768::SEED_M_BYTE_LEN> seed_m{};
std::array<uint8_t, ml_kem_768::PKEY_BYTE_LEN> pubkey{};
std::array<uint8_t, ml_kem_768::SKEY_BYTE_LEN> seckey{};
std::array<uint8_t, ml_kem_768::CIPHER_TEXT_BYTE_LEN> cipher{};
std::array<uint8_t, ml_kem_768::SHARED_SECRET_BYTE_LEN> shared_secret_sender{};
std::array<uint8_t, ml_kem_768::SHARED_SECRET_BYTE_LEN> shared_secret_receiver{};
ml_kem_prng::prng_t<192> prng{};
prng.read(seed_d);
prng.read(seed_z);
prng.read(seed_m);
ml_kem_768::keygen(seed_d, seed_z, pubkey, seckey);
const auto is_encapsulated = ml_kem_768::encapsulate(seed_m, pubkey, cipher, shared_secret_sender);
ml_kem_768::decapsulate(seckey, cipher, shared_secret_receiver);
EXPECT_TRUE(is_encapsulated);
EXPECT_EQ(shared_secret_sender, shared_secret_receiver);
}

View File

@@ -0,0 +1,90 @@
#include "ml_kem/ml_kem_768.hpp"
#include "test_helper.hpp"
#include <fstream>
#include <gtest/gtest.h>
// Test if
//
// - Is ML-KEM-768 implemented correctly ?
// - Is it conformant with the specification https://doi.org/10.6028/NIST.FIPS.203.ipd ?
//
// using Known Answer Tests, generated following
// https://gist.github.com/itzmeanjan/c8f5bc9640d0f0bdd2437dfe364d7710.
TEST(ML_KEM, ML_KEM_768_KnownAnswerTests)
{
using namespace std::literals;
const std::string kat_file = "./kats/ml_kem_768.kat";
std::fstream file(kat_file);
while (true) {
std::string d;
if (!std::getline(file, d).eof()) {
std::string z;
std::string pk;
std::string sk;
std::string m;
std::string ct;
std::string ss;
std::getline(file, z);
std::getline(file, pk);
std::getline(file, sk);
std::getline(file, m);
std::getline(file, ct);
std::getline(file, ss);
auto _d = std::string_view(d);
auto __d = _d.substr(_d.find("="sv) + 2, _d.size());
auto ___d = from_hex<32>(__d);
auto _z = std::string_view(z);
auto __z = _z.substr(_z.find("="sv) + 2, _z.size());
auto ___z = from_hex<32>(__z);
auto _pk = std::string_view(pk);
auto __pk = _pk.substr(_pk.find("="sv) + 2, _pk.size());
auto ___pk = from_hex<ml_kem_768::PKEY_BYTE_LEN>(__pk);
auto _sk = std::string_view(sk);
auto __sk = _sk.substr(_sk.find("="sv) + 2, _sk.size());
auto ___sk = from_hex<ml_kem_768::SKEY_BYTE_LEN>(__sk);
auto _m = std::string_view(m);
auto __m = _m.substr(_m.find("="sv) + 2, _m.size());
auto ___m = from_hex<32>(__m);
auto _ct = std::string_view(ct);
auto __ct = _ct.substr(_ct.find("="sv) + 2, _ct.size());
auto ___ct = from_hex<ml_kem_768::CIPHER_TEXT_BYTE_LEN>(__ct);
auto _ss = std::string_view(ss);
auto __ss = _ss.substr(_ss.find("="sv) + 2, _ss.size());
auto ___ss = from_hex<32>(__ss);
std::array<uint8_t, ml_kem_768::PKEY_BYTE_LEN> pkey{};
std::array<uint8_t, ml_kem_768::SKEY_BYTE_LEN> skey{};
std::array<uint8_t, ml_kem_768::CIPHER_TEXT_BYTE_LEN> ctxt{};
std::array<uint8_t, ml_kem_768::SHARED_SECRET_BYTE_LEN> shrd_sec0{};
std::array<uint8_t, ml_kem_768::SHARED_SECRET_BYTE_LEN> shrd_sec1{};
ml_kem_768::keygen(___d, ___z, pkey, skey);
EXPECT_TRUE(ml_kem_768::encapsulate(___m, pkey, ctxt, shrd_sec0));
ml_kem_768::decapsulate(skey, ctxt, shrd_sec1);
EXPECT_EQ(___pk, pkey);
EXPECT_EQ(___sk, skey);
EXPECT_EQ(___ct, ctxt);
EXPECT_EQ(___ss, shrd_sec0);
EXPECT_EQ(shrd_sec0, shrd_sec1);
std::string empty_line;
std::getline(file, empty_line);
} else {
break;
}
}
file.close();
}

View File

@@ -1,33 +0,0 @@
#include "field.hpp"
#include "ntt.hpp"
#include <gtest/gtest.h>
#include <vector>
// Ensure functional correctness of (inverse) NTT implementation for degree-255
// polynomial over F_q | q = 3329, using following rule
//
// f <- random polynomial
// f' <- ntt(f)
// f'' <- intt(f')
//
// assert(f == f'')
TEST(KyberKEM, NumberTheoreticTransform)
{
std::vector<field::zq_t> poly_a(ntt::N);
std::vector<field::zq_t> poly_b(ntt::N);
auto _poly_a = std::span<field::zq_t, ntt::N>(poly_a);
auto _poly_b = std::span<field::zq_t, ntt::N>(poly_b);
prng::prng_t prng;
for (size_t i = 0; i < ntt::N; i++) {
_poly_a[i] = field::zq_t::random(prng);
}
std::copy(_poly_a.begin(), _poly_a.end(), _poly_b.begin());
ntt::ntt(_poly_b);
ntt::intt(_poly_b);
EXPECT_EQ(poly_a, poly_b);
}

View File

@@ -1,44 +1,39 @@
#include "field.hpp"
#include "serialize.hpp"
#include <cstdint>
#include "ml_kem/internals/poly/serialize.hpp"
#include <gtest/gtest.h>
#include <vector>
// Ensure that degree-255 polynomial serialization to byte array ( of length
// 32*l -bytes ) and deserialization of that byte array to degree-255 polynomial
// works as expected for parameterizable values of l | l ∈ [1, 12].
// Ensure that degree-255 polynomial serialization to byte array ( of length 32*l -bytes ) and deserialization of
// that byte array to degree-255 polynomial works as expected for parameterizable values of l | l ∈ [1, 12].
//
// l denotes significant bit width ( from LSB side ) for each coefficient of
// polynomial.
// `l` denotes significant bit width ( from LSB side ) for each coefficient of polynomial.
template<size_t l>
void
test_serialize_deserialize()
{
constexpr size_t blen = (ntt::N * l) / 8;
constexpr size_t blen = (ml_kem_ntt::N * l) / 8;
constexpr uint32_t mask = (1u << l) - 1u;
std::vector<field::zq_t> src(ntt::N);
std::vector<field::zq_t> dst(ntt::N);
std::vector<ml_kem_field::zq_t> src(ml_kem_ntt::N);
std::vector<ml_kem_field::zq_t> dst(ml_kem_ntt::N);
std::vector<uint8_t> bytes(blen);
prng::prng_t prng;
ml_kem_prng::prng_t<256> prng{};
for (size_t i = 0; i < ntt::N; i++) {
src[i] = field::zq_t::random(prng);
for (size_t i = 0; i < ml_kem_ntt::N; i++) {
src[i] = ml_kem_field::zq_t::random(prng);
}
using poly_t = std::span<field::zq_t, ntt::N>;
using poly_t = std::span<ml_kem_field::zq_t, ml_kem_ntt::N>;
using serialized_t = std::span<uint8_t, blen>;
kyber_utils::encode<l>(poly_t(src), serialized_t(bytes));
kyber_utils::decode<l>(serialized_t(bytes), poly_t(dst));
ml_kem_utils::encode<l>(poly_t(src), serialized_t(bytes));
ml_kem_utils::decode<l>(serialized_t(bytes), poly_t(dst));
for (size_t i = 0; i < ntt::N; i++) {
for (size_t i = 0; i < ml_kem_ntt::N; i++) {
EXPECT_EQ((src[i].raw() & mask), (dst[i].raw() & mask));
}
}
TEST(KyberKEM, PolynomialSerialization)
TEST(ML_KEM, PolynomialSerialization)
{
test_serialize_deserialize<12>();
test_serialize_deserialize<11>();