From 08c2b276fbf8ecb13692449ef9c2cb43aa82ba8e Mon Sep 17 00:00:00 2001 From: Moti Cohen Date: Sun, 22 Dec 2024 14:10:07 +0200 Subject: [PATCH] Optimize dict `no_value` also for even addresses (#13683) This pull request enhances the no_value flag option in the dict implementation, which is used to store keys without associated values. Previously, when a key had an odd memory address and was the only item in a table entry, it could be directly stored as a pointer without requiring an intermediate dictEntry. With this update, the optimization has been extended to also handle keys with even memory addresses in the same manner. --- src/dict.c | 258 ++++++++++++++++++++++++++++++--------------------- src/dict.h | 10 +- src/server.c | 3 +- 3 files changed, 157 insertions(+), 114 deletions(-) diff --git a/src/dict.c b/src/dict.c index 24b1eb80d8..3bfcc70170 100644 --- a/src/dict.c +++ b/src/dict.c @@ -120,14 +120,16 @@ uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) { * pointer actually points to. If the least bit is set, it's a key. Otherwise, * the bit pattern of the least 3 significant bits mark the kind of entry. */ -#define ENTRY_PTR_MASK 7 /* 111 */ -#define ENTRY_PTR_NORMAL 0 /* 000 */ -#define ENTRY_PTR_NO_VALUE 2 /* 010 */ +#define ENTRY_PTR_MASK 7 /* 111 */ +#define ENTRY_PTR_NORMAL 0 /* 000 : If a pointer to an entry with value. */ +#define ENTRY_PTR_IS_ODD_KEY 1 /* XX1 : If a pointer to odd key address (must be 1). */ +#define ENTRY_PTR_IS_EVEN_KEY 2 /* 010 : If a pointer to even key address. (must be 2 or 4). */ +#define ENTRY_PTR_NO_VALUE 4 /* 100 : If a pointer to an entry without value. */ /* Returns 1 if the entry pointer is a pointer to a key, rather than to an * allocated entry. Returns 0 otherwise. */ static inline int entryIsKey(const dictEntry *de) { - return (uintptr_t)(void *)de & 1; + return ((uintptr_t)de & (ENTRY_PTR_IS_ODD_KEY | ENTRY_PTR_IS_EVEN_KEY)); } /* Returns 1 if the pointer is actually a pointer to a dictEntry struct. Returns @@ -156,7 +158,6 @@ static inline dictEntry *encodeMaskedPtr(const void *ptr, unsigned int bits) { } static inline void *decodeMaskedPtr(const dictEntry *de) { - assert(!entryIsKey(de)); return (void *)((uintptr_t)(void *)de & ~ENTRY_PTR_MASK); } @@ -327,18 +328,17 @@ static void rehashEntriesInBucketAtIndex(dict *d, uint64_t idx) { h = idx & DICTHT_SIZE_MASK(d->ht_size_exp[1]); } if (d->type->no_value) { - if (d->type->keys_are_odd && !d->ht_table[1][h]) { - /* Destination bucket is empty and we can store the key - * directly without an allocated entry. Free the old entry - * if it's an allocated entry. - * - * TODO: Add a flag 'keys_are_even' and if set, we can use - * this optimization for these dicts too. We can set the LSB - * bit when stored as a dict entry and clear it again when - * we need the key back. */ - assert(entryIsKey(key)); + if (!d->ht_table[1][h]) { + /* The destination bucket is empty, allowing the key to be stored + * directly without allocating a dictEntry. If an old entry was + * previously allocated, free its memory. */ if (!entryIsKey(de)) zfree(decodeMaskedPtr(de)); - de = key; + + if (d->type->keys_are_odd) + de = key; /* ENTRY_PTR_IS_ODD_KEY trivially set by the odd key. */ + else + de = encodeMaskedPtr(key, ENTRY_PTR_IS_EVEN_KEY); + } else if (entryIsKey(de)) { /* We don't have an allocated entry but we need one. */ de = createEntryNoValue(key, d->ht_table[1][h]); @@ -556,16 +556,17 @@ dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) { assert(bucket >= &d->ht_table[htidx][0] && bucket <= &d->ht_table[htidx][DICTHT_SIZE_MASK(d->ht_size_exp[htidx])]); if (d->type->no_value) { - if (d->type->keys_are_odd && !*bucket) { - /* We can store the key directly in the destination bucket without the - * allocated entry. - * - * TODO: Add a flag 'keys_are_even' and if set, we can use this - * optimization for these dicts too. We can set the LSB bit when - * stored as a dict entry and clear it again when we need the key - * back. */ - entry = key; - assert(entryIsKey(entry)); + if (!*bucket) { + /* We can store the key directly in the destination bucket without + * allocating dictEntry. + */ + if (d->type->keys_are_odd) { + entry = key; + assert(entryIsKey(entry)); + /* The flag ENTRY_PTR_IS_ODD_KEY (=0x1) is already aligned with LSB bit */ + } else { + entry = encodeMaskedPtr(key, ENTRY_PTR_IS_EVEN_KEY); + } } else { /* Allocate an entry without value. */ entry = createEntryNoValue(key, *bucket); @@ -908,7 +909,10 @@ double dictIncrDoubleVal(dictEntry *de, double val) { } void *dictGetKey(const dictEntry *de) { - if (entryIsKey(de)) return (void*)de; + /* if entryIsKey() */ + if ((uintptr_t)de & ENTRY_PTR_IS_ODD_KEY) return (void *) de; + if ((uintptr_t)de & ENTRY_PTR_IS_EVEN_KEY) return decodeMaskedPtr(de); + /* Regular entry */ if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key; return de->key; } @@ -1906,7 +1910,7 @@ int dictTest(int argc, char **argv, int flags) { long j; long long start, elapsed; int retval; - dict *dict = dictCreate(&BenchmarkDictType); + dict *d = dictCreate(&BenchmarkDictType); dictEntry* de = NULL; dictEntry* existing = NULL; long count = 0; @@ -1926,12 +1930,12 @@ int dictTest(int argc, char **argv, int flags) { TEST("Add 16 keys and verify dict resize is ok") { dictSetResizeEnabled(DICT_RESIZE_ENABLE); for (j = 0; j < 16; j++) { - retval = dictAdd(dict,stringFromLongLong(j),(void*)j); + retval = dictAdd(d,stringFromLongLong(j),(void*)j); assert(retval == DICT_OK); } - while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000); - assert(dictSize(dict) == 16); - assert(dictBuckets(dict) == 16); + while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000); + assert(dictSize(d) == 16); + assert(dictBuckets(d) == 16); } TEST("Use DICT_RESIZE_AVOID to disable the dict resize and pad to (dict_force_resize_ratio * 16)") { @@ -1940,112 +1944,112 @@ int dictTest(int argc, char **argv, int flags) { * dict_force_resize_ratio in next test. */ dictSetResizeEnabled(DICT_RESIZE_AVOID); for (j = 16; j < (long)dict_force_resize_ratio * 16; j++) { - retval = dictAdd(dict,stringFromLongLong(j),(void*)j); + retval = dictAdd(d,stringFromLongLong(j),(void*)j); assert(retval == DICT_OK); } current_dict_used = dict_force_resize_ratio * 16; - assert(dictSize(dict) == current_dict_used); - assert(dictBuckets(dict) == 16); + assert(dictSize(d) == current_dict_used); + assert(dictBuckets(d) == 16); } TEST("Add one more key, trigger the dict resize") { - retval = dictAdd(dict,stringFromLongLong(current_dict_used),(void*)(current_dict_used)); + retval = dictAdd(d,stringFromLongLong(current_dict_used),(void*)(current_dict_used)); assert(retval == DICT_OK); current_dict_used++; new_dict_size = 1UL << _dictNextExp(current_dict_used); - assert(dictSize(dict) == current_dict_used); - assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 16); - assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size); + assert(dictSize(d) == current_dict_used); + assert(DICTHT_SIZE(d->ht_size_exp[0]) == 16); + assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size); /* Wait for rehashing. */ dictSetResizeEnabled(DICT_RESIZE_ENABLE); - while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000); - assert(dictSize(dict) == current_dict_used); - assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size); - assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0); + while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000); + assert(dictSize(d) == current_dict_used); + assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size); + assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0); } TEST("Delete keys until we can trigger shrink in next test") { /* Delete keys until we can satisfy (1 / HASHTABLE_MIN_FILL) in the next test. */ for (j = new_dict_size / HASHTABLE_MIN_FILL + 1; j < (long)current_dict_used; j++) { char *key = stringFromLongLong(j); - retval = dictDelete(dict, key); + retval = dictDelete(d, key); zfree(key); assert(retval == DICT_OK); } current_dict_used = new_dict_size / HASHTABLE_MIN_FILL + 1; - assert(dictSize(dict) == current_dict_used); - assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size); - assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0); + assert(dictSize(d) == current_dict_used); + assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size); + assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0); } TEST("Delete one more key, trigger the dict resize") { current_dict_used--; char *key = stringFromLongLong(current_dict_used); - retval = dictDelete(dict, key); + retval = dictDelete(d, key); zfree(key); unsigned long oldDictSize = new_dict_size; new_dict_size = 1UL << _dictNextExp(current_dict_used); assert(retval == DICT_OK); - assert(dictSize(dict) == current_dict_used); - assert(DICTHT_SIZE(dict->ht_size_exp[0]) == oldDictSize); - assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size); + assert(dictSize(d) == current_dict_used); + assert(DICTHT_SIZE(d->ht_size_exp[0]) == oldDictSize); + assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size); /* Wait for rehashing. */ - while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000); - assert(dictSize(dict) == current_dict_used); - assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size); - assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0); + while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000); + assert(dictSize(d) == current_dict_used); + assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size); + assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0); } TEST("Empty the dictionary and add 128 keys") { - dictEmpty(dict, NULL); + dictEmpty(d, NULL); for (j = 0; j < 128; j++) { - retval = dictAdd(dict,stringFromLongLong(j),(void*)j); + retval = dictAdd(d,stringFromLongLong(j),(void*)j); assert(retval == DICT_OK); } - while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000); - assert(dictSize(dict) == 128); - assert(dictBuckets(dict) == 128); + while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000); + assert(dictSize(d) == 128); + assert(dictBuckets(d) == 128); } TEST("Use DICT_RESIZE_AVOID to disable the dict resize and reduce to 3") { /* Use DICT_RESIZE_AVOID to disable the dict reset, and reduce * the number of keys until we can trigger shrinking in next test. */ dictSetResizeEnabled(DICT_RESIZE_AVOID); - remain_keys = DICTHT_SIZE(dict->ht_size_exp[0]) / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) + 1; + remain_keys = DICTHT_SIZE(d->ht_size_exp[0]) / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) + 1; for (j = remain_keys; j < 128; j++) { char *key = stringFromLongLong(j); - retval = dictDelete(dict, key); + retval = dictDelete(d, key); zfree(key); assert(retval == DICT_OK); } current_dict_used = remain_keys; - assert(dictSize(dict) == remain_keys); - assert(dictBuckets(dict) == 128); + assert(dictSize(d) == remain_keys); + assert(dictBuckets(d) == 128); } TEST("Delete one more key, trigger the dict resize") { current_dict_used--; char *key = stringFromLongLong(current_dict_used); - retval = dictDelete(dict, key); + retval = dictDelete(d, key); zfree(key); new_dict_size = 1UL << _dictNextExp(current_dict_used); assert(retval == DICT_OK); - assert(dictSize(dict) == current_dict_used); - assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 128); - assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size); + assert(dictSize(d) == current_dict_used); + assert(DICTHT_SIZE(d->ht_size_exp[0]) == 128); + assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size); /* Wait for rehashing. */ dictSetResizeEnabled(DICT_RESIZE_ENABLE); - while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000); - assert(dictSize(dict) == current_dict_used); - assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size); - assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0); + while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000); + assert(dictSize(d) == current_dict_used); + assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size); + assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0); } TEST("Restore to original state") { - dictEmpty(dict, NULL); + dictEmpty(d, NULL); dictSetResizeEnabled(DICT_RESIZE_ENABLE); } srand(12345); @@ -2055,61 +2059,61 @@ int dictTest(int argc, char **argv, int flags) { char *key = stringFromSubstring(); /* Insert the range directly from the large string */ - de = dictAddRaw(dict, key, &existing); + de = dictAddRaw(d, key, &existing); assert(de != NULL || existing != NULL); /* If key already exists NULL is returned so we need to free the temp key string */ if (de == NULL) zfree(key); } end_benchmark("Inserting random substrings (100-500B) from large string with symbols"); - assert((long)dictSize(dict) <= count); - dictEmpty(dict, NULL); + assert((long)dictSize(d) <= count); + dictEmpty(d, NULL); start_benchmark(); for (j = 0; j < count; j++) { - retval = dictAdd(dict,stringFromLongLong(j),(void*)j); + retval = dictAdd(d,stringFromLongLong(j),(void*)j); assert(retval == DICT_OK); } end_benchmark("Inserting via dictAdd() non existing"); - assert((long)dictSize(dict) == count); + assert((long)dictSize(d) == count); - dictEmpty(dict, NULL); + dictEmpty(d, NULL); start_benchmark(); for (j = 0; j < count; j++) { - de = dictAddRaw(dict,stringFromLongLong(j),NULL); + de = dictAddRaw(d,stringFromLongLong(j),NULL); assert(de != NULL); } end_benchmark("Inserting via dictAddRaw() non existing"); - assert((long)dictSize(dict) == count); + assert((long)dictSize(d) == count); start_benchmark(); for (j = 0; j < count; j++) { void *key = stringFromLongLong(j); - de = dictAddRaw(dict,key,&existing); + de = dictAddRaw(d,key,&existing); assert(existing != NULL); zfree(key); } end_benchmark("Inserting via dictAddRaw() existing (no insertion)"); - assert((long)dictSize(dict) == count); + assert((long)dictSize(d) == count); - dictEmpty(dict, NULL); + dictEmpty(d, NULL); start_benchmark(); for (j = 0; j < count; j++) { void *key = stringFromLongLong(j); - const uint64_t hash = dictGetHash(dict, key); - de = dictAddNonExistsByHash(dict,key,hash); + const uint64_t hash = dictGetHash(d, key); + de = dictAddNonExistsByHash(d,key,hash); assert(de != NULL); } end_benchmark("Inserting via dictAddNonExistsByHash() non existing"); - assert((long)dictSize(dict) == count); + assert((long)dictSize(d) == count); /* Wait for rehashing. */ - while (dictIsRehashing(dict)) { - dictRehashMicroseconds(dict,100*1000); + while (dictIsRehashing(d)) { + dictRehashMicroseconds(d,100*1000); } - dictEmpty(dict, NULL); + dictEmpty(d, NULL); start_benchmark(); for (j = 0; j < count; j++) { @@ -2117,41 +2121,41 @@ int dictTest(int argc, char **argv, int flags) { void *key = stringFromLongLong(j); /* Check if the key exists */ - dictEntry *entry = dictFind(dict, key); + dictEntry *entry = dictFind(d, key); assert(entry == NULL); /* Add the key */ - dictEntry *de = dictAddRaw(dict, key, NULL); + dictEntry *de = dictAddRaw(d, key, NULL); assert(de != NULL); } end_benchmark("Find() and inserting via dictFind()+dictAddRaw() non existing"); - dictEmpty(dict, NULL); + dictEmpty(d, NULL); start_benchmark(); for (j = 0; j < count; j++) { /* Create a key */ void *key = stringFromLongLong(j); - uint64_t hash = dictGetHash(dict, key); + uint64_t hash = dictGetHash(d, key); - /* Check if the key exists */ - dictEntry *entry = dictFindByHash(dict, key, hash); + /* Check if the key exists */ + dictEntry *entry = dictFindByHash(d, key, hash); assert(entry == NULL); - de = dictAddNonExistsByHash(dict, key, hash); + de = dictAddNonExistsByHash(d, key, hash); assert(de != NULL); } end_benchmark("Find() and inserting via dictGetHash()+dictFindByHash()+dictAddNonExistsByHash() non existing"); - assert((long)dictSize(dict) == count); + assert((long)dictSize(d) == count); /* Wait for rehashing. */ - while (dictIsRehashing(dict)) { - dictRehashMicroseconds(dict,100*1000); + while (dictIsRehashing(d)) { + dictRehashMicroseconds(d,100*1000); } start_benchmark(); for (j = 0; j < count; j++) { char *key = stringFromLongLong(j); - dictEntry *de = dictFind(dict,key); + dictEntry *de = dictFind(d,key); assert(de != NULL); zfree(key); } @@ -2160,7 +2164,7 @@ int dictTest(int argc, char **argv, int flags) { start_benchmark(); for (j = 0; j < count; j++) { char *key = stringFromLongLong(j); - dictEntry *de = dictFind(dict,key); + dictEntry *de = dictFind(d,key); assert(de != NULL); zfree(key); } @@ -2169,7 +2173,7 @@ int dictTest(int argc, char **argv, int flags) { start_benchmark(); for (j = 0; j < count; j++) { char *key = stringFromLongLong(rand() % count); - dictEntry *de = dictFind(dict,key); + dictEntry *de = dictFind(d,key); assert(de != NULL); zfree(key); } @@ -2177,7 +2181,7 @@ int dictTest(int argc, char **argv, int flags) { start_benchmark(); for (j = 0; j < count; j++) { - dictEntry *de = dictGetRandomKey(dict); + dictEntry *de = dictGetRandomKey(d); assert(de != NULL); } end_benchmark("Accessing random keys"); @@ -2186,7 +2190,7 @@ int dictTest(int argc, char **argv, int flags) { for (j = 0; j < count; j++) { char *key = stringFromLongLong(rand() % count); key[0] = 'X'; - dictEntry *de = dictFind(dict,key); + dictEntry *de = dictFind(d,key); assert(de == NULL); zfree(key); } @@ -2195,14 +2199,52 @@ int dictTest(int argc, char **argv, int flags) { start_benchmark(); for (j = 0; j < count; j++) { char *key = stringFromLongLong(j); - retval = dictDelete(dict,key); + retval = dictDelete(d,key); assert(retval == DICT_OK); key[0] += 17; /* Change first number to letter. */ - retval = dictAdd(dict,key,(void*)j); + retval = dictAdd(d,key,(void*)j); assert(retval == DICT_OK); } end_benchmark("Removing and adding"); - dictRelease(dict); + dictRelease(d); + + TEST("Use dict without values (no_value=1)") { + dictType dt = BenchmarkDictType; + dt.no_value = 1; + + /* Allocate array of size count and fill it with keys (stringFromLongLong(j) */ + char **lookupKeys = zmalloc(sizeof(char*) * count); + for (long j = 0; j < count; j++) + lookupKeys[j] = stringFromLongLong(j); + + + /* Add keys without values. */ + dict *d = dictCreate(&dt); + for (j = 0; j < count; j++) { + retval = dictAdd(d,lookupKeys[j],NULL); + assert(retval == DICT_OK); + } + + /* Now, we should be able to find the keys. */ + for (j = 0; j < count; j++) { + dictEntry *de = dictFind(d,lookupKeys[j]); + assert(de != NULL); + } + + /* Find non exists keys. */ + for (j = 0; j < count; j++) { + /* Temporarily override first char of key */ + char tmp = lookupKeys[j][0]; + lookupKeys[j][0] = 'X'; + dictEntry *de = dictFind(d,lookupKeys[j]); + lookupKeys[j][0] = tmp; + assert(de == NULL); + } + + dictRelease(d); + zfree(lookupKeys); + } + return 0; } #endif diff --git a/src/dict.h b/src/dict.h index bcc207c476..12a5c99185 100644 --- a/src/dict.h +++ b/src/dict.h @@ -53,12 +53,12 @@ typedef struct dictType { /* Flags */ /* The 'no_value' flag, if set, indicates that values are not used, i.e. the * dict is a set. When this flag is set, it's not possible to access the - * value of a dictEntry and it's also impossible to use dictSetKey(). Entry - * metadata can also not be used. */ + * value of a dictEntry and it's also impossible to use dictSetKey(). It + * enables an optimization to store a key directly without an allocating + * dictEntry in between, if it is the only key in the bucket. */ unsigned int no_value:1; - /* If no_value = 1 and all keys are odd (LSB=1), setting keys_are_odd = 1 - * enables one more optimization: to store a key without an allocated - * dictEntry. */ + /* This flag is required for `no_value` optimization since the optimization + * reuses LSB bits as metadata */ unsigned int keys_are_odd:1; /* TODO: Add a 'keys_are_even' flag and use a similar optimization if that * flag is set. */ diff --git a/src/server.c b/src/server.c index 973b020010..4b729fedea 100644 --- a/src/server.c +++ b/src/server.c @@ -636,7 +636,8 @@ dictType clientDictType = { NULL, /* key dup */ NULL, /* val dup */ dictClientKeyCompare, /* key compare */ - .no_value = 1 /* no values in this dict */ + .no_value = 1, /* no values in this dict */ + .keys_are_odd = 0 /* a client pointer is not an odd pointer */ }; /* This function is called once a background process of some kind terminates,