add idmap encoding benchmark

2026-01-07 23:14:02 -05:00 · 2025-11-17 14:55:35 +01:00
parent b82a71ab41
commit b1b9552855
3 changed files with 44 additions and 3 deletions
--- a/src/utils/IdMap.js
+++ b/src/utils/IdMap.js
@@ -467,7 +467,7 @@ export const writeIdMap = (encoder, idmap) => {
   * @type {Map<string, number>}
   */
  const visitedAttrNames = map.create()
-  // Ensure that the delete set is written in a deterministic order (smaller clientids first)
+  // Ensure that the ids are written in a deterministic order (smaller clientids first)
  array.from(idmap.clients.entries())
    .sort((a, b) => a[0] - b[0])
    .forEach(([client, _idRanges]) => {
--- a/tests/IdMap.tests.js
+++ b/tests/IdMap.tests.js
@@ -4,6 +4,7 @@ import * as prng from 'lib0/prng'
 import * as math from 'lib0/math'
 import { compareIdmaps as compareIdMaps, createIdMap, ID, createRandomIdSet, createRandomIdMap, createAttributionItem } from './testHelper.js'
 import * as YY from '../src/internals.js'
+import * as time from 'lib0/time'

 /**
 * @template T
@@ -165,7 +166,7 @@ export const testRepeatRandomDeletes = tc => {
 /**
 * @param {t.TestCase} tc
 */
-export const testrepeatRandomIntersects = tc => {
+export const testRepeatRandomIntersects = tc => {
  const clients = 4
  const clockRange = 100
  const ids1 = createRandomIdMap(tc.prng, clients, clockRange, [1])
@@ -194,3 +195,43 @@ export const testrepeatRandomIntersects = tc => {
  const altDiffed1 = idmap.diffIdMap(ids1, intersected)
  compareIdMaps(diffed1, altDiffed1)
 }
+
+/**
+ * @param {t.TestCase} tc
+ */
+export const testUserAttributionEncodingBenchmark = tc => {
+  /**
+   * @todo debug why this approach needs 30 bytes per item
+   * @todo it should be possible to only use a single idmap and, in each attr entry, encode the diff
+   * to the previous entries (e.g. remove a,b, insert c,d)
+   */
+  let attributions = createIdMap()
+  let currentTime = time.getUnixTime()
+  const ydoc = new YY.Doc()
+  ydoc.on('afterTransaction', tr => {
+    idmap.insertIntoIdMap(attributions, idmap.createIdMapFromIdSet(tr.insertSet, [createAttributionItem('insert', 'userX'), createAttributionItem('insertAt', currentTime)]))
+    idmap.insertIntoIdMap(attributions, idmap.createIdMapFromIdSet(tr.deleteSet, [createAttributionItem('delete', 'userX'), createAttributionItem('deleteAt', currentTime)]))
+    currentTime += 1
+  })
+  const ytext = ydoc.getText()
+  const N = 10000
+  t.measureTime(`time to attribute ${N/1000}k changes`, () => {
+    for (let i = 0; i < N; i++) {
+      if (i % 2 > 0 && ytext.length > 0) {
+        const pos = prng.int31(tc.prng, 0, ytext.length)
+        const delLen = prng.int31(tc.prng, 0, ytext.length - pos)
+        ytext.delete(pos, delLen)
+      } else {
+        ytext.insert(prng.int31(tc.prng, 0, ytext.length), prng.word(tc.prng))
+      }
+    }
+  })
+  t.measureTime(`time to encode attributions map`, () => {
+    /**
+     * @todo I can optimize size by encoding only the differences to the prev item.
+     */
+    const encAttributions = idmap.encodeIdMap(attributions)
+    t.info('encoded size: ' + encAttributions.byteLength)
+    t.info('size per change: ' + math.floor((encAttributions.byteLength / N) * 100)/100 + ' bytes')
+  })
+}
--- a/tests/IdSet.tests.js
+++ b/tests/IdSet.tests.js
@@ -225,7 +225,7 @@ export const testRepeatRandomDiffing2 = tc => {
 /**
 * @param {t.TestCase} tc
 */
-export const testrepeatRandomIntersects = tc => {
+export const testRepeatRandomIntersects = tc => {
  const clients = 4
  const clockRange = 100
  const ids1 = createRandomIdSet(tc.prng, clients, clockRange)