efficient encoding & decoding of IdMaps

This commit is contained in:
Kevin Jahns
2025-04-19 15:15:34 +02:00
parent 2d87301af2
commit 99bcafe193
4 changed files with 189 additions and 11 deletions

View File

@@ -1,12 +1,13 @@
import {
_diffSet,
findIndexInIdRanges,
IdSet, ID // eslint-disable-line
DSDecoderV1, DSDecoderV2, DSEncoderV1, DSEncoderV2, IdSet, ID // eslint-disable-line
} from '../internals.js'
import * as array from 'lib0/array'
import * as map from 'lib0/map'
import * as encoding from 'lib0/encoding'
import * as decoding from 'lib0/decoding'
import * as buf from 'lib0/buffer'
import * as rabin from 'lib0/hash/rabin'
@@ -41,7 +42,6 @@ const _hashAttribution = attr => {
return buf.toBase64(rabin.fingerprint(rabin.StandardIrreducible128, encoding.toUint8Array(encoder)))
}
/**
* @template V
* @param {string} name
@@ -126,6 +126,7 @@ export class AttrRanges {
* @param {Array<Attribution<Attrs>>} attrs
*/
add (clock, length, attrs) {
if (length === 0) return
this.sorted = false
this._ids.push(new AttrRange(clock, length, attrs))
}
@@ -325,6 +326,7 @@ export class IdMap {
* @param {Array<Attribution<Attrs>>} attrs
*/
add (client, clock, len, attrs) {
if (len === 0) return
attrs = _ensureAttrs(this, attrs)
const ranges = this.clients.get(client)
if (ranges == null) {
@@ -335,6 +337,146 @@ export class IdMap {
}
}
/**
* Efficiently encodes IdMap to a binary form. Ensures that information is de-duplicated when
* written. Attribute.names are referenced by id. Attributes themselfs are also referenced by id.
*
* @template Attr
* @param {DSEncoderV1 | DSEncoderV2} encoder
* @param {IdMap<Attr>} idmap
*
* @private
* @function
*/
export const writeIdMap = (encoder, idmap) => {
encoding.writeVarUint(encoder.restEncoder, idmap.clients.size)
let lastWrittenClientId = 0
/**
* @type {Map<Attribution<Attr>, number>}
*/
const visitedAttributions = map.create()
/**
* @type {Map<string, number>}
*/
const visitedAttrNames = map.create()
// Ensure that the delete set is written in a deterministic order (smaller clientids first)
array.from(idmap.clients.entries())
.sort((a, b) => a[0] - b[0])
.forEach(([client, _idRanges]) => {
const attrRanges = _idRanges.getIds()
encoder.resetDsCurVal()
const diff = client - lastWrittenClientId
encoding.writeVarUint(encoder.restEncoder, diff)
lastWrittenClientId = client
const len = attrRanges.length
encoding.writeVarUint(encoder.restEncoder, len)
for (let i = 0; i < len; i++) {
const item = attrRanges[i]
const attrs = item.attrs
const attrLen = attrs.length
encoder.writeDsClock(item.clock)
encoder.writeDsLen(item.len)
encoding.writeVarUint(encoder.restEncoder, attrLen)
for (let j = 0; j < attrLen; j++) {
const attr = attrs[j]
const attrId = visitedAttributions.get(attr)
if (attrId != null) {
encoding.writeVarUint(encoder.restEncoder, attrId)
} else {
const newAttrId = visitedAttributions.size
visitedAttributions.set(attr, newAttrId)
encoding.writeVarUint(encoder.restEncoder, newAttrId)
const attrNameId = visitedAttrNames.get(attr.name)
// write attr.name
if (attrNameId != null) {
encoding.writeVarUint(encoder.restEncoder, attrNameId)
} else {
const newAttrNameId = visitedAttrNames.size
encoding.writeVarUint(encoder.restEncoder, newAttrNameId)
encoding.writeVarString(encoder.restEncoder, attr.name)
visitedAttrNames.set(attr.name, newAttrNameId)
}
encoding.writeAny(encoder.restEncoder, /** @type {any} */ (attr.val))
}
}
}
})
}
/**
* @param {IdMap<any>} idmap
*/
export const encodeIdMap = idmap => {
const encoder = new DSEncoderV2()
writeIdMap(encoder, idmap)
return encoder.toUint8Array()
}
/**
* @param {DSDecoderV1 | DSDecoderV2} decoder
* @return {IdMap<any>}
*
* @private
* @function
*/
export const readIdMap = decoder => {
const idmap = new IdMap()
const numClients = decoding.readVarUint(decoder.restDecoder)
/**
* @type {Array<Attribution<any>>}
*/
const visitedAttributions = []
/**
* @type {Array<string>}
*/
const visitedAttrNames = []
let lastClientId = 0
for (let i = 0; i < numClients; i++) {
decoder.resetDsCurVal()
const client = lastClientId + decoding.readVarUint(decoder.restDecoder)
lastClientId = client
const numberOfDeletes = decoding.readVarUint(decoder.restDecoder)
/**
* @type {Array<AttrRange<any>>}
*/
const attrRanges = []
for (let i = 0; i < numberOfDeletes; i++) {
const rangeClock = decoder.readDsClock()
const rangeLen = decoder.readDsLen()
/**
* @type {Array<Attribution<any>>}
*/
const attrs = []
const attrsLen = decoding.readVarUint(decoder.restDecoder)
for (let j = 0; j < attrsLen; j++) {
const attrId = decoding.readVarUint(decoder.restDecoder)
if (attrId >= visitedAttributions.length) {
// attrId not known yet
const attrNameId = decoding.readVarUint(decoder.restDecoder)
if (attrNameId >= visitedAttrNames.length) {
visitedAttrNames.push(decoding.readVarString(decoder.restDecoder))
}
visitedAttributions.push(new Attribution(visitedAttrNames[attrNameId], decoding.readAny(decoder.restDecoder)))
}
attrs.push(visitedAttributions[attrId])
}
attrRanges.push(new AttrRange(rangeClock, rangeLen, attrs))
}
idmap.clients.set(client, new AttrRanges(attrRanges))
}
visitedAttributions.forEach(attr => {
idmap.attrs.add(attr)
idmap.attrsH.set(attr.hash(), attr)
})
return idmap
}
/**
* @param {Uint8Array} data
* @return {IdMap<any>}
*/
export const decodeIdMap = data => readIdMap(new DSDecoderV2(decoding.createDecoder(data)))
/**
* @template Attrs
* @param {IdMap<Attrs>} idmap
@@ -342,10 +484,12 @@ export class IdMap {
* @return {Array<Attribution<Attrs>>}
*/
const _ensureAttrs = (idmap, attrs) => attrs.map(attr =>
idmap.attrs.has(attr) ? attr : map.setIfUndefined(idmap.attrsH, _hashAttribution(attr), () => {
idmap.attrs.add(attr)
return attr
}))
idmap.attrs.has(attr)
? attr
: map.setIfUndefined(idmap.attrsH, _hashAttribution(attr), () => {
idmap.attrs.add(attr)
return attr
}))
export const createIdMap = () => new IdMap()
@@ -358,4 +502,9 @@ export const createIdMap = () => new IdMap()
* @param {IdSet | IdMap<any>} exclude
* @return {ISet}
*/
export const diffIdMap = _diffSet
export const diffIdMap = (set, exclude) => {
const diffed = _diffSet(set, exclude)
diffed.attrs = set.attrs
diffed.attrsH = set.attrsH
return diffed
}

View File

@@ -303,6 +303,7 @@ export const diffIdSet = _diffSet
* @function
*/
export const addToIdSet = (idSet, client, clock, length) => {
if (length === 0) return
const idRanges = idSet.clients.get(client)
if (idRanges) {
idRanges.add(clock, length)

View File

@@ -1,6 +1,7 @@
import * as t from 'lib0/testing'
import * as am from '../src/utils/IdMap.js'
import { compareIdmaps, createIdMap, ID, createRandomIdSet, createRandomIdMap, createAttribution } from './testHelper.js'
import * as YY from '../src/internals.js'
/**
* @template T
@@ -115,6 +116,8 @@ export const testRepeatRandomDiffing = tc => {
const e1 = am.diffIdMap(ds1, ds2)
const e2 = am.diffIdMap(merged, ds2)
compareIdmaps(e1, e2)
const copy = YY.decodeIdMap(YY.encodeIdMap(e1))
compareIdmaps(e1, copy)
}
/**
@@ -133,4 +136,6 @@ export const testRepeatRandomDiffing2 = tc => {
const e2 = am.diffIdMap(am2, idsExclude)
const excludedMerged = am.mergeIdMaps([e1, e2])
compareIdmaps(mergedExcluded, excludedMerged)
const copy = YY.decodeIdMap(YY.encodeIdMap(mergedExcluded))
compareIdmaps(mergedExcluded, copy)
}

View File

@@ -8,7 +8,7 @@ import * as map from 'lib0/map'
import * as Y from '../src/index.js'
import * as math from 'lib0/math'
import {
createIdSet, createIdMap, addToIdSet
createIdSet, createIdMap, addToIdSet, encodeIdMap
} from '../src/internals.js'
export * from '../src/index.js'
@@ -313,7 +313,7 @@ export const init = (tc, { users = 5 } = {}, initTestObject) => {
* @param {Y.IdSet} idSet2
*/
export const compareIdSets = (idSet1, idSet2) => {
if (idSet1.clients.size !== idSet2.clients.size) return false
t.assert(idSet1.clients.size === idSet2.clients.size)
for (const [client, _items1] of idSet1.clients.entries()) {
const items1 = _items1.getIds()
const items2 = idSet2.clients.get(client)?.getIds()
@@ -349,13 +349,34 @@ const _idmapAttrsHas = (attrs, attr) => {
*/
export const _idmapAttrsEqual = (a, b) => a.length === b.length && a.every(v => _idmapAttrsHas(b, v))
/**
* Ensure that all attributes exist. Also create a copy and compare it to the original.
*
* @template T
* @param {Y.IdMap<T>} idmap
*/
export const validateIdMap = idmap => {
const copy = Y.createIdMap()
idmap.clients.forEach((ranges, client) => {
ranges.getIds().forEach(range => {
range.attrs.forEach(attr => {
t.assert(idmap.attrs.has(attr))
t.assert(idmap.attrsH.get(attr.hash()) === attr)
copy.add(client, range.clock, range.len, range.attrs.slice())
})
})
t.assert(copy.clients.get(client)?.getIds().length === ranges.getIds().length)
})
t.assert(idmap.attrsH.size === idmap.attrs.size)
}
/**
* @template T
* @param {Y.IdMap<T>} am1
* @param {Y.IdMap<T>} am2
*/
export const compareIdmaps = (am1, am2) => {
if (am1.clients.size !== am2.clients.size) return false
t.assert(am1.clients.size === am2.clients.size)
for (const [client, _items1] of am1.clients.entries()) {
const items1 = _items1.getIds()
const items2 = am2.clients.get(client)?.getIds()
@@ -366,7 +387,8 @@ export const compareIdmaps = (am1, am2) => {
t.assert(di1.clock === di2.clock && di1.len === di2.len && _idmapAttrsEqual(di1.attrs, di2.attrs))
}
}
return true
validateIdMap(am1)
validateIdMap(am2)
}
/**
@@ -416,6 +438,7 @@ export const createRandomIdMap = (gen, clients, clockRange, attrChoices) => {
}
idMap.add(client, clockStart, len, attrs.map(v => Y.createAttribution('', v)))
}
t.info(`Created IdMap with ${numOfOps} ranges and ${attrChoices.length} different attributes. Encoded size: ${encodeIdMap(idMap).byteLength}`)
return idMap
}