mirror of
https://github.com/vacp2p/linea-monorepo.git
synced 2026-01-09 04:08:01 -05:00
* wizard runtime perf monitor * ring-sis before revert * sanity check successful * rm test config file
240 lines
7.4 KiB
Go
240 lines
7.4 KiB
Go
// Code generated by bavard DO NOT EDIT
|
|
|
|
package ringsis_64_8
|
|
|
|
import (
|
|
"github.com/consensys/gnark-crypto/ecc/bls12-377/fr/fft"
|
|
"github.com/consensys/linea-monorepo/prover/maths/common/smartvectors"
|
|
"github.com/consensys/linea-monorepo/prover/maths/common/vector"
|
|
"github.com/consensys/linea-monorepo/prover/maths/field"
|
|
"github.com/consensys/linea-monorepo/prover/utils"
|
|
"github.com/consensys/linea-monorepo/prover/utils/parallel"
|
|
ppool "github.com/consensys/linea-monorepo/prover/utils/parallel/pool"
|
|
)
|
|
|
|
func TransversalHash(
|
|
// the Ag for ring-sis
|
|
ag [][]field.Element,
|
|
// A non-transposed list of columns
|
|
// All of the same length
|
|
pols []smartvectors.SmartVector,
|
|
// The precomputed twiddle cosets for the forward FFT
|
|
twiddleCosets []field.Element,
|
|
// The domain for the final inverse-FFT
|
|
domain *fft.Domain,
|
|
) []field.Element {
|
|
|
|
var (
|
|
// Each field element is encoded in 32 limbs but the degree is 64. So, each
|
|
// polynomial multiplication "hashes" 2 field elements at once. This is
|
|
// important to know for parallelization.
|
|
resultSize = pols[0].Len() * 64
|
|
|
|
// To optimize memory usage, we limit ourself to hash only 16 columns per
|
|
// iteration.
|
|
numColumnPerJob int = 16
|
|
|
|
// In theory, it should be a div ceil. But in practice we only process power's
|
|
// of two number of columns. If that's not the case, then the function will panic
|
|
// but we can always change that if this is needed. The rational for the current
|
|
// design is simplicity.
|
|
numJobs = utils.DivExact(pols[0].Len(), numColumnPerJob) // we make blocks of 16 columns
|
|
|
|
// Main result of the hashing
|
|
mainResults = make([]field.Element, resultSize)
|
|
// When we encounter a const row, it will have the same additive contribution
|
|
// to the result on every column. So we compute the contribution only once and
|
|
// accumulate it with the other "constant column contributions". And it is only
|
|
// performed by the first thread.
|
|
constResults = make([]field.Element, 64)
|
|
)
|
|
|
|
ppool.ExecutePoolChunky(numJobs, func(i int) {
|
|
// We process the columns per segment of `numColumnPerJob`
|
|
var (
|
|
localResult = make([]field.Element, numColumnPerJob*64)
|
|
limbs = make([]field.Element, 64)
|
|
|
|
// Each segment is processed by packet of `numFieldPerPoly=2` rows
|
|
startFromCol = i * numColumnPerJob
|
|
stopAtCol = (i + 1) * numColumnPerJob
|
|
)
|
|
|
|
for row := 0; row < len(pols); row += 2 {
|
|
|
|
var (
|
|
chunksFull = make([][]field.Element, 2)
|
|
mask = 0
|
|
)
|
|
|
|
for j := 0; j < 2; j++ {
|
|
if row+j >= len(pols) {
|
|
continue
|
|
}
|
|
|
|
pReg, pIsReg := pols[row+j].(*smartvectors.Regular)
|
|
if pIsReg {
|
|
chunksFull[j] = (*pReg)[startFromCol:stopAtCol]
|
|
mask |= (1 << j)
|
|
continue
|
|
}
|
|
|
|
pPool, pIsPool := pols[row+j].(*smartvectors.Pooled)
|
|
if pIsPool {
|
|
chunksFull[j] = pPool.Regular[startFromCol:stopAtCol]
|
|
mask |= (1 << j)
|
|
continue
|
|
}
|
|
}
|
|
|
|
if mask > 0 {
|
|
for col := 0; col < (stopAtCol - startFromCol); col++ {
|
|
colChunk := [2]field.Element{}
|
|
for j := 0; j < 2; j++ {
|
|
if chunksFull[j] != nil {
|
|
colChunk[j] = chunksFull[j][col]
|
|
}
|
|
}
|
|
|
|
limbDecompose(limbs, colChunk[:])
|
|
partialFFT[mask](limbs, twiddleCosets)
|
|
mulModAcc(localResult[col*64:(col+1)*64], limbs, ag[row/2])
|
|
}
|
|
}
|
|
|
|
if i == 0 {
|
|
|
|
var (
|
|
cMask = ((1 << 2) - 1) ^ mask
|
|
chunkConst = make([]field.Element, 2)
|
|
)
|
|
|
|
if cMask > 0 {
|
|
for j := 0; j < 2; j++ {
|
|
if row+j >= len(pols) {
|
|
continue
|
|
}
|
|
|
|
if (cMask>>j)&1 == 1 {
|
|
chunkConst[j] = pols[row+j].(*smartvectors.Constant).Get(0)
|
|
}
|
|
}
|
|
|
|
limbDecompose(limbs, chunkConst)
|
|
partialFFT[cMask](limbs, twiddleCosets)
|
|
mulModAcc(constResults, limbs, ag[row/2])
|
|
}
|
|
}
|
|
}
|
|
|
|
// copy the segment into the main result at the end
|
|
copy(mainResults[startFromCol*64:stopAtCol*64], localResult)
|
|
})
|
|
|
|
// Now, we need to reconciliate the results of the buffer with
|
|
// the result for each thread
|
|
parallel.Execute(pols[0].Len(), func(start, stop int) {
|
|
for col := start; col < stop; col++ {
|
|
// Accumulate the const
|
|
vector.Add(mainResults[col*64:(col+1)*64], mainResults[col*64:(col+1)*64], constResults)
|
|
// And run the reverse FFT
|
|
domain.FFTInverse(mainResults[col*64:(col+1)*64], fft.DIT, fft.OnCoset(), fft.WithNbTasks(1))
|
|
}
|
|
})
|
|
|
|
return mainResults
|
|
}
|
|
|
|
var _zeroes []field.Element = make([]field.Element, 64)
|
|
|
|
// zeroize fills `buf` with zeroes.
|
|
func zeroize(buf []field.Element) {
|
|
copy(buf, _zeroes)
|
|
}
|
|
|
|
// mulModAdd increments each entry `i` of `res` as `res[i] = a[i] * b[i]`. The
|
|
// input vectors are trusted to all have the same length.
|
|
func mulModAcc(res, a, b []field.Element) {
|
|
var tmp field.Element
|
|
for i := range res {
|
|
tmp.Mul(&a[i], &b[i])
|
|
res[i].Add(&res[i], &tmp)
|
|
}
|
|
}
|
|
|
|
func limbDecompose(result []field.Element, x []field.Element) {
|
|
|
|
zeroize(result)
|
|
var bytesBuffer = [32]byte{}
|
|
|
|
bytesBuffer = x[0].Bytes()
|
|
|
|
result[31][0] = uint64(bytesBuffer[0])
|
|
result[30][0] = uint64(bytesBuffer[1])
|
|
result[29][0] = uint64(bytesBuffer[2])
|
|
result[28][0] = uint64(bytesBuffer[3])
|
|
result[27][0] = uint64(bytesBuffer[4])
|
|
result[26][0] = uint64(bytesBuffer[5])
|
|
result[25][0] = uint64(bytesBuffer[6])
|
|
result[24][0] = uint64(bytesBuffer[7])
|
|
result[23][0] = uint64(bytesBuffer[8])
|
|
result[22][0] = uint64(bytesBuffer[9])
|
|
result[21][0] = uint64(bytesBuffer[10])
|
|
result[20][0] = uint64(bytesBuffer[11])
|
|
result[19][0] = uint64(bytesBuffer[12])
|
|
result[18][0] = uint64(bytesBuffer[13])
|
|
result[17][0] = uint64(bytesBuffer[14])
|
|
result[16][0] = uint64(bytesBuffer[15])
|
|
result[15][0] = uint64(bytesBuffer[16])
|
|
result[14][0] = uint64(bytesBuffer[17])
|
|
result[13][0] = uint64(bytesBuffer[18])
|
|
result[12][0] = uint64(bytesBuffer[19])
|
|
result[11][0] = uint64(bytesBuffer[20])
|
|
result[10][0] = uint64(bytesBuffer[21])
|
|
result[9][0] = uint64(bytesBuffer[22])
|
|
result[8][0] = uint64(bytesBuffer[23])
|
|
result[7][0] = uint64(bytesBuffer[24])
|
|
result[6][0] = uint64(bytesBuffer[25])
|
|
result[5][0] = uint64(bytesBuffer[26])
|
|
result[4][0] = uint64(bytesBuffer[27])
|
|
result[3][0] = uint64(bytesBuffer[28])
|
|
result[2][0] = uint64(bytesBuffer[29])
|
|
result[1][0] = uint64(bytesBuffer[30])
|
|
result[0][0] = uint64(bytesBuffer[31])
|
|
|
|
bytesBuffer = x[1].Bytes()
|
|
|
|
result[63][0] = uint64(bytesBuffer[0])
|
|
result[62][0] = uint64(bytesBuffer[1])
|
|
result[61][0] = uint64(bytesBuffer[2])
|
|
result[60][0] = uint64(bytesBuffer[3])
|
|
result[59][0] = uint64(bytesBuffer[4])
|
|
result[58][0] = uint64(bytesBuffer[5])
|
|
result[57][0] = uint64(bytesBuffer[6])
|
|
result[56][0] = uint64(bytesBuffer[7])
|
|
result[55][0] = uint64(bytesBuffer[8])
|
|
result[54][0] = uint64(bytesBuffer[9])
|
|
result[53][0] = uint64(bytesBuffer[10])
|
|
result[52][0] = uint64(bytesBuffer[11])
|
|
result[51][0] = uint64(bytesBuffer[12])
|
|
result[50][0] = uint64(bytesBuffer[13])
|
|
result[49][0] = uint64(bytesBuffer[14])
|
|
result[48][0] = uint64(bytesBuffer[15])
|
|
result[47][0] = uint64(bytesBuffer[16])
|
|
result[46][0] = uint64(bytesBuffer[17])
|
|
result[45][0] = uint64(bytesBuffer[18])
|
|
result[44][0] = uint64(bytesBuffer[19])
|
|
result[43][0] = uint64(bytesBuffer[20])
|
|
result[42][0] = uint64(bytesBuffer[21])
|
|
result[41][0] = uint64(bytesBuffer[22])
|
|
result[40][0] = uint64(bytesBuffer[23])
|
|
result[39][0] = uint64(bytesBuffer[24])
|
|
result[38][0] = uint64(bytesBuffer[25])
|
|
result[37][0] = uint64(bytesBuffer[26])
|
|
result[36][0] = uint64(bytesBuffer[27])
|
|
result[35][0] = uint64(bytesBuffer[28])
|
|
result[34][0] = uint64(bytesBuffer[29])
|
|
result[33][0] = uint64(bytesBuffer[30])
|
|
result[32][0] = uint64(bytesBuffer[31])
|
|
}
|