Files
powdr/test_data/std/shift16_test.asm
Steve Wang 6c1c31a4da BabyBear shift machine (#1784)
All tests passed. :)

Operations:
- shl<0> A1, A2, B -> C1, C2
- shr<1> A1, A2, B -> C1, C2
- `A1` `A2` are 16 bit limbs of 32 bit `A` in little-endian order.
Likewise for `C1` and `C2`

Implementation:
- We adopted a similar implementation to our prior shift machine, which
decomposes `A` to 4 bytes and looks up each byte to a lookup table of
`[A_byte, B (shift amount), block row, operation id]`, so the size of
the lookup table is `[256, 32, 4, 2] = 65536`. Each row looks up to the
resulting byte after the bit shifting, and the results are added
together to obtain `C`.
- In our design, instead of looking up to 32-bit `C` column, we are
looking up to two 16-bit `C1` and `C2` columns. Overall, there are more
witness columns due to decomposing to 16-bit limbs the in the main shift
machine and one more fixed column in the lookup table, but the same
number of lookups performed.

Future optimization:
- There's ample ground for "reshaping" the main machine to have more
columns but fewer rows, and do more lookups in each row so that we are
not just processing one byte in each row. In the most aggressive case,
we can even process everything in the same row.
- For example, processing two bytes in one row (instead of one byte)
should have half the number of rows but less than twice the number of
columns, and therefore fewer cells in total. This should be good for
provers with time linear to the number of cells.

---------

Co-authored-by: onurinanc <e191322@metu.edu.tr>
2024-09-11 13:28:23 +00:00

73 lines
2.4 KiB
Rust

use std::machines::shift16::ByteShift16;
use std::machines::shift16::Shift16;
machine Main with degree: 65536 {
reg pc[@pc];
reg X0_1[<=];
reg X0_2[<=];
reg X1[<=];
reg X2_1[<=];
reg X2_2[<=];
reg ALow;
reg AHi;
ByteShift16 byte_shift_16;
Shift16 shift16(byte_shift_16);
instr shl X0_1, X0_2, X1 -> X2_1, X2_2 link ~> (X2_1, X2_2) = shift16.shl(X0_1, X0_2, X1);
instr shr X0_1, X0_2, X1 -> X2_1, X2_2 link ~> (X2_1, X2_2) = shift16.shr(X0_1, X0_2, X1);
instr assert_eq X0_1, X0_2, X2_1, X2_2 {
X0_1 = X2_1,
X0_2 = X2_2
}
function main {
// SHL
ALow, AHi <== shl(0x1357, 0x9acf, 0);
assert_eq ALow, AHi, 0x1357, 0x9acf;
ALow, AHi <== shl(0x1357, 0x9acf, 1);
assert_eq ALow, AHi, 0x26ae, 0x359e;
ALow, AHi <== shl(0x1357, 0x9acf, 4);
assert_eq ALow, AHi, 0x3570, 0xacf1;
ALow, AHi <== shl(0x1357, 0x9acf, 8);
assert_eq ALow, AHi, 0x5700, 0xcf13;
ALow, AHi <== shl(0x1357, 0x9acf, 12);
assert_eq ALow, AHi, 0x7000, 0xf135;
ALow, AHi <== shl(0x1357, 0x9acf, 16);
assert_eq ALow, AHi, 0, 0x1357;
ALow, AHi <== shl(0x1357, 0x9acf, 20);
assert_eq ALow, AHi, 0, 0x3570;
ALow, AHi <== shl(0x1357, 0x9acf, 24);
assert_eq ALow, AHi, 0, 0x5700;
ALow, AHi <== shl(0x1357, 0x9acf, 28);
assert_eq ALow, AHi, 0, 0x7000;
ALow, AHi <== shl(0x1357, 0x9acf, 31);
assert_eq ALow, AHi, 0, 0x8000;
// SHR
ALow, AHi <== shr(0x1357, 0x9acf, 0);
assert_eq ALow, AHi, 0x1357, 0x9acf;
ALow, AHi <== shr(0x1357, 0x9acf, 1);
assert_eq ALow, AHi, 0x89ab, 0x4d67;
ALow, AHi <== shr(0x1357, 0x9acf, 4);
assert_eq ALow, AHi, 0xf135, 0x09ac;
ALow, AHi <== shr(0x1357, 0x9acf, 8);
assert_eq ALow, AHi, 0xcf13, 0x009a;
ALow, AHi <== shr(0x1357, 0x9acf, 12);
assert_eq ALow, AHi, 0xacf1, 0x0009;
ALow, AHi <== shr(0x1357, 0x9acf, 16);
assert_eq ALow, AHi, 0x9acf, 0;
ALow, AHi <== shr(0x1357, 0x9acf, 20);
assert_eq ALow, AHi, 0x09ac, 0;
ALow, AHi <== shr(0x1357, 0x9acf, 24);
assert_eq ALow, AHi, 0x009a, 0;
ALow, AHi <== shr(0x1357, 0x9acf, 28);
assert_eq ALow, AHi, 0x0009, 0;
ALow, AHi <== shr(0x1357, 0x9acf, 31);
assert_eq ALow, AHi, 0x1, 0;
return;
}
}