Introduce DEBUG_DEFRAG compilation option to allow run test with activedefrag when allocator is not jemalloc (#14326)

This PR is based on https://github.com/valkey-io/valkey/pull/1303

This PR introduces a DEBUG_DEFRAG compilation option that enables
activedefrag functionality even when the allocator is not jemalloc, and
always forces defragmentation regardless of the amount or ratio of
fragmentation.

## Using
```
make SANITIZER=address DEBUG_DEFRAG=<force|fully>
./runtest --debug-defrag
```

* DEBUG_DEFRAG=force
   * Ignore the threshold for defragmentation to ensure that
defragmentation is always triggered.
   * Always reallocate pointers to probe for correctness issues in pointer
reallocation.

* DEBUG_DEFRAG=fully
   * Includes everything in the option `force`.
   * Additionally performs a full defrag on every defrag cycle, which is
significantly slower but more accurate.

---------

Co-authored-by: Ran Shidlansik <ranshid@amazon.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: oranagra <oran@redislabs.com>
This commit is contained in:
debing.sun
2025-09-10 12:52:20 +08:00
committed by GitHub
parent d649637ebd
commit 60adba48aa
20 changed files with 135 additions and 22 deletions

View File

@@ -11,7 +11,7 @@ on:
inputs:
skipjobs:
description: 'jobs to skip (delete the ones you wanna keep, do not leave empty)'
default: 'valgrind,sanitizer,tls,freebsd,macos,alpine,32bit,iothreads,ubuntu,centos,malloc,specific,fortify,reply-schema,oldTC'
default: 'valgrind,sanitizer,tls,freebsd,macos,alpine,32bit,iothreads,ubuntu,centos,malloc,specific,fortify,reply-schema,oldTC,defrag'
skiptests:
description: 'tests to skip (delete the ones you wanna keep, do not leave empty)'
default: 'redis,modules,sentinel,cluster,unittest'
@@ -1223,3 +1223,31 @@ jobs:
if: true && !contains(github.event.inputs.skiptests, 'cluster')
run: |
./runtest-cluster ${{github.event.inputs.cluster_test_args}}
test-sanitizer-force-defrag:
runs-on: ubuntu-latest
if: |
(github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
!contains(github.event.inputs.skipjobs, 'defrag')
timeout-minutes: 14400
steps:
- name: prep
if: github.event_name == 'workflow_dispatch'
run: |
echo "GITHUB_REPOSITORY=${{github.event.inputs.use_repo}}" >> $GITHUB_ENV
echo "GITHUB_HEAD_REF=${{github.event.inputs.use_git_ref}}" >> $GITHUB_ENV
echo "skipjobs: ${{github.event.inputs.skipjobs}}"
echo "skiptests: ${{github.event.inputs.skiptests}}"
echo "test_args: ${{github.event.inputs.test_args}}"
echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}"
- uses: actions/checkout@v4
with:
repository: ${{ env.GITHUB_REPOSITORY }}
ref: ${{ env.GITHUB_HEAD_REF }}
- name: make
run: make SANITIZER=address DEBUG_DEFRAG=force REDIS_CFLAGS='-Werror'
- name: testprep
run: sudo apt-get install tcl8.6 tclx
- name: test
if: true && !contains(github.event.inputs.skiptests, 'redis')
run: ./runtest --debug-defrag --verbose --clients 1 ${{github.event.inputs.test_args}}

View File

@@ -137,6 +137,13 @@ endif
endif
endif
# Special case of forcing defrag to run even though we have no Jemlloc support
ifeq ($(DEBUG_DEFRAG), force)
CFLAGS +=-DDEBUG_DEFRAG_FORCE
else ifeq ($(DEBUG_DEFRAG), fully)
CFLAGS +=-DDEBUG_DEFRAG_FORCE -DDEBUG_DEFRAG_FULLY
endif
# Override default settings if possible
-include .make-settings

View File

@@ -141,6 +141,7 @@ typedef struct {
* pointers are worthwhile moving and which aren't */
int je_get_defrag_hint(void* ptr);
#if !defined(DEBUG_DEFRAG_FORCE)
/* Defrag helper for generic allocations without freeing old pointer.
*
* Note: The caller is responsible for freeing the old pointer if this function
@@ -188,6 +189,37 @@ void activeDefragFreeRaw(void *ptr) {
activeDefragFree(ptr);
server.stat_active_defrag_hits++;
}
#else
void *activeDefragAllocWithoutFree(void *ptr) {
size_t size;
void *newptr;
size = zmalloc_usable_size(ptr);
newptr = zmalloc(size);
memcpy(newptr, ptr, size);
server.stat_active_defrag_hits++;
return newptr;
}
void activeDefragFree(void *ptr) {
zfree(ptr);
}
void *activeDefragAlloc(void *ptr) {
void *newptr = activeDefragAllocWithoutFree(ptr);
if (newptr)
activeDefragFree(ptr);
return newptr;
}
void *activeDefragAllocRaw(size_t size) {
return zmalloc(size);
}
void activeDefragFreeRaw(void *ptr) {
zfree(ptr);
server.stat_active_defrag_hits++;
}
#endif
/*Defrag helper for sds strings
*
@@ -1009,6 +1041,7 @@ static void dbKeysScanCallback(void *privdata, const dictEntry *de, dictEntryLin
server.stat_active_defrag_scanned++;
}
#if !defined(DEBUG_DEFRAG_FORCE)
/* Utility function to get the fragmentation ratio from jemalloc.
* It is critical to do that by comparing only heap maps that belong to
* jemalloc, and skip ones the jemalloc keeps as spare. Since we use this
@@ -1042,6 +1075,13 @@ float getAllocatorFragmentation(size_t *out_frag_bytes) {
allocated, active, resident, frag_pct, rss_pct, frag_smallbins_bytes, rss_bytes);
return frag_pct;
}
#else
float getAllocatorFragmentation(size_t *out_frag_bytes) {
if (out_frag_bytes)
*out_frag_bytes = SIZE_MAX;
return 99; /* The maximum percentage of fragmentation */
}
#endif
/* Defrag scan callback for the pubsub dictionary. */
void defragPubsubScanCallback(void *privdata, const dictEntry *de, dictEntryLink plink) {
@@ -1585,6 +1625,9 @@ static int activeDefragTimeProc(struct aeEventLoop *eventLoop, long long id, voi
monotime starttime = getMonotonicUs();
int dutyCycleUs = computeDefragCycleUs();
#if defined(DEBUG_DEFRAG_FULLY)
dutyCycleUs = 30*1000*1000LL; /* 30 seconds */
#endif
monotime endtime = starttime + dutyCycleUs;
int haveMoreWork = 1;

View File

@@ -239,7 +239,7 @@ void zfree_with_flags(void *ptr, int flags) {
/* Allocation and free functions that bypass the thread cache
* and go straight to the allocator arena bins.
* Currently implemented only for jemalloc. Used for online defragmentation. */
#ifdef HAVE_DEFRAG
#if (defined(USE_JEMALLOC) && defined(HAVE_DEFRAG))
void *zmalloc_no_tcache(size_t size) {
if (size >= SIZE_MAX/2) zmalloc_oom_handler(size);
void *ptr = mallocx(size+PREFIX_SIZE, MALLOCX_TCACHE_NONE);

View File

@@ -76,7 +76,7 @@
/* We can enable the Redis defrag capabilities only if we are using Jemalloc
* and the version used is our special version modified for Redis having
* the ability to return per-allocation fragmentation hints. */
#if defined(USE_JEMALLOC) && defined(JEMALLOC_FRAG_HINT)
#if (defined(USE_JEMALLOC) && defined(JEMALLOC_FRAG_HINT)) || defined(DEBUG_DEFRAG_FORCE)
#define HAVE_DEFRAG
#endif
@@ -127,7 +127,7 @@ void *zrealloc_with_flags(void *ptr, size_t size, int flags);
void zfree_with_flags(void *ptr, int flags);
#endif
#ifdef HAVE_DEFRAG
#if (defined(USE_JEMALLOC) && defined(HAVE_DEFRAG))
void zfree_no_tcache(void *ptr);
__attribute__((malloc)) void *zmalloc_no_tcache(size_t size);
#endif

View File

@@ -1,4 +1,4 @@
start_server {tags {"repl external:skip"}} {
start_server {tags {"repl external:skip debug_defrag:skip"}} {
start_server {} {
test {First server should have role slave after SLAVEOF} {
r -1 slaveof [srv 0 host] [srv 0 port]

View File

@@ -117,7 +117,7 @@ start_server {} {
# become smaller when master disconnects with slow replicas since output buffer
# limit is reached.
foreach rdbchannel {"yes" "no"} {
start_server {tags {"repl external:skip"}} {
start_server {tags {"repl external:skip debug_defrag:skip"}} {
start_server {} {
start_server {} {
set replica1 [srv -2 client]

View File

@@ -227,7 +227,7 @@ start_server {tags {"repl external:skip"}} {
}
}
start_server {tags {"repl external:skip"}} {
start_server {tags {"repl external:skip debug_defrag:skip"}} {
set replica [srv 0 client]
start_server {} {
@@ -401,7 +401,7 @@ start_server {tags {"repl external:skip"}} {
}
}
start_server {tags {"repl external:skip"}} {
start_server {tags {"repl external:skip debug_defrag:skip"}} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]

View File

@@ -322,7 +322,7 @@ start_server {tags {"repl external:skip"}} {
foreach mdl {no yes} rdbchannel {no yes} {
foreach sdl {disabled swapdb} {
start_server {tags {"repl external:skip"} overrides {save {}}} {
start_server {tags {"repl external:skip debug_defrag:skip"} overrides {save {}}} {
set master [srv 0 client]
$master config set repl-diskless-sync $mdl
$master config set repl-diskless-sync-delay 5
@@ -1539,7 +1539,7 @@ foreach disklessload {disabled on-empty-db} {
catch {$replica shutdown nosave}
}
}
} {} {repl external:skip}
} {} {repl external:skip debug_defrag:skip}
}
start_server {tags {"repl external:skip"} overrides {save {}}} {

View File

@@ -1,3 +1,17 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of (a) the Redis Source Available License 2.0
# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
# GNU Affero General Public License v3 (AGPLv3).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
set ::global_overrides {}
set ::tags {}
set ::valgrind_errors {}
@@ -222,6 +236,11 @@ proc tags_acceptable {tags err_return} {
return 0
}
if {$::debug_defrag && [lsearch $tags "debug_defrag:skip"] >= 0} {
set err "Not supported on server compiled with DEBUG_DEFRAG option"
return 0
}
if {$::singledb && [lsearch $tags "singledb:skip"] >= 0} {
set err "Not supported on singledb"
return 0
@@ -558,6 +577,12 @@ proc start_server {options {code undefined}} {
dict set config "client-default-resp" "3"
}
if {$::debug_defrag} {
dict set config "activedefrag" "yes" ;# defrag enabled
dict set config "active-defrag-cycle-min" "65"
dict set config "active-defrag-cycle-max" "75"
}
# write new configuration to temporary file
set config_file [tmpfile redis.conf]
create_server_config_file $config_file $config $config_lines

View File

@@ -3,9 +3,15 @@
# Copyright (C) 2014-Present, Redis Ltd.
# All Rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of (a) the Redis Source Available License 2.0
# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
# GNU Affero General Public License v3 (AGPLv3).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
package require Tcl 8.5
@@ -83,6 +89,7 @@ set ::ignoredigest 0
set ::large_memory 0
set ::log_req_res 0
set ::force_resp3 0
set ::debug_defrag 0
# Set to 1 when we are running in client mode. The Redis test uses a
# server-client model to run tests simultaneously. The server instance
@@ -577,6 +584,7 @@ proc print_help_screen {} {
"--ignore-encoding Don't validate object encoding."
"--ignore-digest Don't use debug digest validations."
"--large-memory Run tests using over 100mb."
"--debug-defrag Indicate the test is running against server compiled with DEBUG_DEFRAG option"
"--help Print this help screen."
} "\n"]
}
@@ -708,6 +716,8 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
set ::ignoreencoding 1
} elseif {$opt eq {--ignore-digest}} {
set ::ignoredigest 1
} elseif {$opt eq {--debug-defrag}} {
set ::debug_defrag 1
} elseif {$opt eq {--help}} {
print_help_screen
exit 0

View File

@@ -59,7 +59,7 @@ start_server {tags {"aofrw external:skip logreqres:skip"} overrides {save {}}} {
}
}
start_server {tags {"aofrw external:skip"} overrides {aof-use-rdb-preamble no}} {
start_server {tags {"aofrw external:skip debug_defrag:skip"} overrides {aof-use-rdb-preamble no}} {
test {Turning off AOF kills the background writing child if any} {
r config set appendonly yes
waitForBgrewriteaof r

View File

@@ -331,7 +331,7 @@ start_server {tags {"info" "external:skip"}} {
if {$::verbose} { puts "eventloop metrics cmd_sum1: $cmd_sum1, cmd_sum2: $cmd_sum2" }
assert_morethan $cmd_sum2 $cmd_sum1
assert_lessthan $cmd_sum2 [expr $cmd_sum1+15000] ;# we expect about tens of ms here, but allow some tolerance
}
} {} {debug_defrag:skip}
test {stats: instantaneous metrics} {
r config resetstat
@@ -359,7 +359,7 @@ start_server {tags {"info" "external:skip"}} {
if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_duration_usec: $value" }
assert_morethan $value 0
assert_lessthan $value [expr $retries*22000] ;# default hz is 10, so duration < 1000 / 10, allow some tolerance
}
} {} {debug_defrag:skip}
test {stats: debug metrics} {
# make sure debug info is hidden

View File

@@ -1004,13 +1004,13 @@ run_solo {defrag} {
}
}
}
} {} {defrag external:skip tsan:skip cluster}
} {} {defrag external:skip tsan:skip debug_defrag:skip cluster}
start_cluster 1 0 {tags {"defrag external:skip tsan:skip cluster"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save "" loglevel notice}} {
start_cluster 1 0 {tags {"defrag external:skip tsan:skip debug_defrag:skip cluster"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save "" loglevel notice}} {
test_active_defrag "cluster"
}
start_server {tags {"defrag external:skip tsan:skip standalone"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save "" loglevel notice}} {
start_server {tags {"defrag external:skip tsan:skip debug_defrag:skip standalone"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save "" loglevel notice}} {
test_active_defrag "standalone"
}
} ;# run_solo

View File

@@ -1,6 +1,6 @@
set testmodule [file normalize tests/modules/defragtest.so]
start_server {tags {"modules external:skip"} overrides {{save ""}}} {
start_server {tags {"modules external:skip debug_defrag:skip"} overrides {{save ""}}} {
r module load $testmodule
r config set hz 100
r config set active-defrag-ignore-bytes 1

View File

@@ -1,6 +1,6 @@
set testmodule [file normalize tests/modules/rdbloadsave.so]
start_server {tags {"modules external:skip"}} {
start_server {tags {"modules external:skip debug_defrag:skip"}} {
r module load $testmodule
test "Module rdbloadsave sanity" {

View File

@@ -2003,7 +2003,7 @@ start_server {tags {"scripting"}} {
} else {
assert_lessthan [s used_memory_vm_functions] 14500000
}
}
} {} {debug_defrag:skip}
}
} ;# foreach is_eval

View File

@@ -96,7 +96,7 @@ start_server {tags {"incr"}} {
assert {[string range $old 0 2] eq "at:"}
assert {[string range $new 0 2] eq "at:"}
assert {$old eq $new}
} {} {needs:debug}
} {} {needs:debug debug_defrag:skip}
test {INCRBYFLOAT against non existing key} {
r del novar

View File

@@ -1132,7 +1132,7 @@ foreach type {single multiple single_multiple} {
r config set save $origin_save
r config set set-max-listpack-entries $origin_max_lp
r config set rdb-key-save-delay $origin_save_delay
} {OK} {needs:debug slow}
} {OK} {needs:debug slow debug_defrag:skip}
proc setup_move {} {
r del myset3{t} myset4{t}

View File

@@ -100,7 +100,7 @@ start_server {tags {"string"}} {
assert_equal 1 [r setnx x 20]
assert_equal 20 [r get x]
}
} {} {debug_defrag:skip}
test "GETEX EX option" {
r del foo