Fix bug in PFMERGE command (#13672)

The bug was introduced in #13558 . 

When merging dense hll structures, `hllDenseCompress` writes to wrong
location and the result will be zero. The unit tests didn't cover this
case.

This PR
+ fixes the bug
+ adds `PFDEBUG SIMD (ON|OFF)` for unit tests
+ adds a new TCL test to cover the cases

Synchronized from https://github.com/valkey-io/valkey/pull/1293

---------

Signed-off-by: Xuyang Wang <xuyangwang@link.cuhk.edu.cn>
Co-authored-by: debing.sun <debing.sun@redis.com>
This commit is contained in:
Nugine
2024-12-18 14:41:04 +08:00
committed by GitHub
parent f8942f93a6
commit 684077682e
2 changed files with 71 additions and 2 deletions

View File

@@ -190,6 +190,13 @@ struct hllhdr {
static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected";
#ifdef HAVE_AVX2
static int simd_enabled = 1;
#define HLL_USE_AVX2 (simd_enabled && __builtin_cpu_supports("avx2"))
#else
#define HLL_USE_AVX2 0
#endif
/* =========================== Low level bit macros ========================= */
/* Macros to access the dense representation.
@@ -1155,7 +1162,7 @@ void hllMergeDenseAVX2(uint8_t *reg_raw, const uint8_t *reg_dense) {
void hllMergeDense(uint8_t* reg_raw, const uint8_t* reg_dense) {
#ifdef HAVE_AVX2
if (HLL_REGISTERS == 16384 && HLL_BITS == 6) {
if (__builtin_cpu_supports("avx2")) {
if (HLL_USE_AVX2) {
hllMergeDenseAVX2(reg_raw, reg_dense);
return;
}
@@ -1315,7 +1322,7 @@ void hllDenseCompressAVX2(uint8_t *reg_dense, const uint8_t *reg_raw) {
void hllDenseCompress(uint8_t *reg_dense, const uint8_t *reg_raw) {
#ifdef HAVE_AVX2
if (HLL_REGISTERS == 16384 && HLL_BITS == 6) {
if (__builtin_cpu_supports("avx2")) {
if (HLL_USE_AVX2) {
hllDenseCompressAVX2(reg_dense, reg_raw);
return;
}
@@ -1587,6 +1594,7 @@ void pfmergeCommand(client *c) {
/* Write the resulting HLL to the destination HLL registers and
* invalidate the cached value. */
if (use_dense) {
hdr = o->ptr;
hllDenseCompress(hdr->registers, max);
} else {
for (j = 0; j < HLL_REGISTERS; j++) {
@@ -1724,6 +1732,7 @@ cleanup:
* PFDEBUG DECODE <key>
* PFDEBUG ENCODING <key>
* PFDEBUG TODENSE <key>
* PFDEBUG SIMD (ON|OFF)
*/
void pfdebugCommand(client *c) {
char *cmd = c->argv[1]->ptr;
@@ -1731,6 +1740,26 @@ void pfdebugCommand(client *c) {
robj *o;
int j;
if (!strcasecmp(cmd, "simd")) {
if (c->argc != 3) goto arityerr;
if (!strcasecmp(c->argv[2]->ptr, "on")) {
#ifdef HAVE_AVX2
simd_enabled = 1;
#endif
} else if (!strcasecmp(c->argv[2]->ptr, "off")) {
#ifdef HAVE_AVX2
simd_enabled = 0;
#endif
} else {
addReplyError(c, "Argument must be ON or OFF");
}
addReplyStatus(c, HLL_USE_AVX2 ? "enabled" : "disabled");
return;
}
o = lookupKeyWrite(c->db,c->argv[2]);
if (o == NULL) {
addReplyError(c,"The specified key does not exist");

View File

@@ -222,6 +222,46 @@ start_server {tags {"hll"}} {
assert_equal 3 [r pfcount destkey]
}
test {PFMERGE results with simd} {
r del hllscalar{t} hllsimd{t} hll1{t} hll2{t} hll3{t}
for {set x 1} {$x < 2000} {incr x} {
r pfadd hll1{t} [expr rand()]
}
for {set x 1} {$x < 4000} {incr x} {
r pfadd hll2{t} [expr rand()]
}
for {set x 1} {$x < 8000} {incr x} {
r pfadd hll3{t} [expr rand()]
}
assert {[r pfcount hll1{t}] > 0}
assert {[r pfcount hll2{t}] > 0}
assert {[r pfcount hll3{t}] > 0}
r pfdebug simd off
set scalar [r pfcount hll1{t} hll2{t} hll3{t}]
r pfdebug simd on
set simd [r pfcount hll1{t} hll2{t} hll3{t}]
assert {$scalar > 0}
assert {$simd > 0}
assert_equal $scalar $simd
r pfdebug simd off
r pfmerge hllscalar{t} hll1{t} hll2{t} hll3{t}
r pfdebug simd on
r pfmerge hllsimd{t} hll1{t} hll2{t} hll3{t}
set scalar [r pfcount hllscalar{t}]
set simd [r pfcount hllsimd{t}]
assert {$scalar > 0}
assert {$simd > 0}
assert_equal $scalar $simd
set scalar [r get hllscalar{t}]
set simd [r get hllsimd{t}]
assert_equal $scalar $simd
} {} {needs:pfdebug}
test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
r del hll1{t} hll2{t} hll3{t}
for {set x 1} {$x < 10000} {incr x} {