diff --git a/.gitignore b/.gitignore
index 2524e19c4d..363acfca13 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,7 @@ src/nodes.conf
 deps/lua/src/lua
 deps/lua/src/luac
 deps/lua/src/liblua.a
+deps/hdr_histogram/libhdrhistogram.a
 tests/tls/*
 .make-*
 .prerequisites
diff --git a/deps/hdr_histogram/Makefile b/deps/hdr_histogram/Makefile
index 83d3760ae8..64f2e547ae 100644
--- a/deps/hdr_histogram/Makefile
+++ b/deps/hdr_histogram/Makefile
@@ -9,12 +9,21 @@ DEBUG= -g
 R_CC=$(CC) $(R_CFLAGS)
 R_LD=$(CC) $(R_LDFLAGS)
 
-hdr_histogram.o: hdr_histogram.h hdr_histogram.c 
+AR= ar
+ARFLAGS= rcs
+
+libhdrhistogram.a: hdr_histogram.o hdr_alloc.o
+	$(AR) $(ARFLAGS) $@ $+
+
+hdr_alloc.o: hdr_alloc.h hdr_alloc.c
+
+hdr_histogram.o: hdr_alloc.o hdr_histogram.h hdr_histogram.c
 
 .c.o:
 	$(R_CC) -c  $< 
 
 clean:
 	rm -f *.o
+	rm -f *.a
 
 
diff --git a/deps/hdr_histogram/hdr_alloc.c b/deps/hdr_histogram/hdr_alloc.c
new file mode 100644
index 0000000000..d2bc611d93
--- /dev/null
+++ b/deps/hdr_histogram/hdr_alloc.c
@@ -0,0 +1,34 @@
+/**
+ * hdr_alloc.c
+ * Written by Filipe Oliveira and released to the public domain,
+ * as explained at http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+#include "hdr_alloc.h"
+#include <stdlib.h>
+
+hdrAllocFuncs hdrAllocFns = {
+    .mallocFn = malloc,
+    .callocFn = calloc,
+    .reallocFn = realloc,
+    .freeFn = free,
+};
+
+/* Override hdr' allocators with ones supplied by the user */
+hdrAllocFuncs hdrSetAllocators(hdrAllocFuncs *override) {
+    hdrAllocFuncs orig = hdrAllocFns;
+
+    hdrAllocFns = *override;
+
+    return orig;
+}
+
+/* Reset allocators to use build time defaults */
+void hdrResetAllocators(void) {
+    hdrAllocFns = (hdrAllocFuncs){
+        .mallocFn = malloc,
+        .callocFn = calloc,
+        .reallocFn = realloc,
+        .freeFn = free,
+    };
+}
diff --git a/deps/hdr_histogram/hdr_alloc.h b/deps/hdr_histogram/hdr_alloc.h
new file mode 100644
index 0000000000..410f640af0
--- /dev/null
+++ b/deps/hdr_histogram/hdr_alloc.h
@@ -0,0 +1,47 @@
+/**
+ * hdr_alloc.h
+ * Written by Filipe Oliveira and released to the public domain,
+ * as explained at http://creativecommons.org/publicdomain/zero/1.0/
+ *
+ * Allocator selection.
+ *
+ * This file is used in order to change the HdrHistogram allocator at run
+ * time. */
+
+#ifndef HDR_ALLOC_H
+#define HDR_ALLOC_H
+
+#include <stddef.h> /* for size_t */
+#include <stdint.h>
+
+/* Structure pointing to our actually configured allocators */
+typedef struct hdrAllocFuncs {
+    void *(*mallocFn)(size_t);
+    void *(*callocFn)(size_t, size_t);
+    void *(*reallocFn)(void *, size_t);
+    void (*freeFn)(void *);
+} hdrAllocFuncs;
+
+/* hdr' configured allocator function pointer struct */
+extern hdrAllocFuncs hdrAllocFns;
+
+hdrAllocFuncs hdrSetAllocators(hdrAllocFuncs *ha);
+void hdrResetAllocators(void);
+
+static inline void *hdr_malloc(size_t size) {
+    return hdrAllocFns.mallocFn(size);
+}
+
+static inline void *hdr_calloc(size_t nmemb, size_t size) {
+    return hdrAllocFns.callocFn(nmemb, size);
+}
+
+static inline void *hdr_realloc(void *ptr, size_t size) {
+    return hdrAllocFns.reallocFn(ptr, size);
+}
+
+static inline void hdr_free(void *ptr) {
+    hdrAllocFns.freeFn(ptr);
+}
+
+#endif /* HDR_ALLOC_H */
diff --git a/deps/hdr_histogram/hdr_histogram.c b/deps/hdr_histogram/hdr_histogram.c
index 5f5286f2f8..42fdcb6875 100644
--- a/deps/hdr_histogram/hdr_histogram.c
+++ b/deps/hdr_histogram/hdr_histogram.c
@@ -15,6 +15,12 @@
 
 #include "hdr_histogram.h"
 #include "hdr_atomic.h"
+#include "hdr_alloc.h"
+
+#define malloc hdr_malloc
+#define calloc hdr_calloc
+#define free hdr_free
+#define realloc hdr_realloc
 
 /*  ######   #######  ##     ## ##    ## ########  ######  */
 /* ##    ## ##     ## ##     ## ###   ##    ##    ##    ## */
diff --git a/deps/hdr_histogram/hdr_histogram.h b/deps/hdr_histogram/hdr_histogram.h
index c26c56b203..11ece24427 100644
--- a/deps/hdr_histogram/hdr_histogram.h
+++ b/deps/hdr_histogram/hdr_histogram.h
@@ -13,7 +13,6 @@
 #include <stdint.h>
 #include <stdbool.h>
 #include <stdio.h>
-
 struct hdr_histogram
 {
     int64_t lowest_trackable_value;
diff --git a/redis.conf b/redis.conf
index 6e4d6233ea..b0f2ac7f19 100644
--- a/redis.conf
+++ b/redis.conf
@@ -1769,6 +1769,20 @@ slowlog-max-len 128
 # "CONFIG SET latency-monitor-threshold <milliseconds>" if needed.
 latency-monitor-threshold 0
 
+################################ LATENCY TRACKING ##############################
+
+# The Redis extended latency monitoring tracks the per command latencies and enables
+# exporting the percentile distribution via the INFO latencystats command,
+# and cumulative latency distributions (histograms) via the LATENCY command.
+#
+# By default, the extended latency monitoring is enabled since the overhead
+# of keeping track of the command latency is very small.
+# latency-tracking yes
+
+# By default the exported latency percentiles via the INFO latencystats command
+# are the p50, p99, and p999.
+# latency-tracking-info-percentiles 50.0 99.0 99.9
+
 ############################# EVENT NOTIFICATION ##############################
 
 # Redis can notify Pub/Sub clients about events happening in the key space.
diff --git a/src/Makefile b/src/Makefile
index a6e4b2098d..9c6eab4f88 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -365,7 +365,7 @@ endif
 
 # redis-server
 $(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ)
-	$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a $(FINAL_LIBS)
+	$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a $(FINAL_LIBS)
 
 # redis-sentinel
 $(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME)
@@ -385,7 +385,7 @@ $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
 
 # redis-benchmark
 $(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ)
-	$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/hdr_histogram/hdr_histogram.o $(FINAL_LIBS)
+	$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/hdr_histogram/libhdrhistogram.a $(FINAL_LIBS)
 
 DEP = $(REDIS_SERVER_OBJ:%.o=%.d) $(REDIS_CLI_OBJ:%.o=%.d) $(REDIS_BENCHMARK_OBJ:%.o=%.d)
 -include $(DEP)
diff --git a/src/blocked.c b/src/blocked.c
index 6a553926b5..3d381691a8 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -111,7 +111,8 @@ void blockClient(client *c, int btype) {
 void updateStatsOnUnblock(client *c, long blocked_us, long reply_us){
     const ustime_t total_cmd_duration = c->duration + blocked_us + reply_us;
     c->lastcmd->microseconds += total_cmd_duration;
-
+    if (server.latency_tracking_enabled)
+        updateCommandLatencyHistogram(&(c->lastcmd->latency_histogram), total_cmd_duration*1000);
     /* Log the command into the Slow log if needed. */
     slowlogPushCurrentCommand(c, c->lastcmd, total_cmd_duration);
     /* Log the reply duration event. */
diff --git a/src/commands.c b/src/commands.c
index 56dd4ac6a9..aee327f5f0 100644
--- a/src/commands.c
+++ b/src/commands.c
@@ -4105,6 +4105,20 @@ struct redisCommandArg LATENCY_GRAPH_Args[] = {
 /* LATENCY HELP hints */
 #define LATENCY_HELP_Hints NULL
 
+/********** LATENCY HISTOGRAM ********************/
+
+/* LATENCY HISTOGRAM history */
+#define LATENCY_HISTOGRAM_History NULL
+
+/* LATENCY HISTOGRAM hints */
+#define LATENCY_HISTOGRAM_Hints NULL
+
+/* LATENCY HISTOGRAM argument table */
+struct redisCommandArg LATENCY_HISTOGRAM_Args[] = {
+{"command",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE},
+{0}
+};
+
 /********** LATENCY HISTORY ********************/
 
 /* LATENCY HISTORY history */
@@ -4146,6 +4160,7 @@ struct redisCommand LATENCY_Subcommands[] = {
 {"doctor","Return a human readable latency analysis report.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_DOCTOR_History,LATENCY_DOCTOR_Hints,latencyCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0},
 {"graph","Return a latency graph for the event.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_GRAPH_History,LATENCY_GRAPH_Hints,latencyCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,.args=LATENCY_GRAPH_Args},
 {"help","Show helpful text about the different subcommands.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_HELP_History,LATENCY_HELP_Hints,latencyCommand,2,CMD_LOADING|CMD_STALE,0},
+{"histogram","Return the cumulative distribution of latencies of a subset of commands or all.","O(N) where N is the number of commands with latency information being retrieved.","7.0.0",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_HISTOGRAM_History,LATENCY_HISTOGRAM_Hints,latencyCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,.args=LATENCY_HISTOGRAM_Args},
 {"history","Return timestamp-latency samples for the event.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_HISTORY_History,LATENCY_HISTORY_Hints,latencyCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,.args=LATENCY_HISTORY_Args},
 {"latest","Return the latest latency samples for all events.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_LATEST_History,LATENCY_LATEST_Hints,latencyCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0},
 {"reset","Reset latency data for one or more events.","O(1)","2.8.13",CMD_DOC_NONE,NULL,NULL,COMMAND_GROUP_SERVER,LATENCY_RESET_History,LATENCY_RESET_Hints,latencyCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,.args=LATENCY_RESET_Args},
diff --git a/src/commands/latency-histogram.json b/src/commands/latency-histogram.json
new file mode 100644
index 0000000000..6ffecd80dc
--- /dev/null
+++ b/src/commands/latency-histogram.json
@@ -0,0 +1,25 @@
+{
+    "HISTOGRAM": {
+        "summary": "Return the cumulative distribution of latencies of a subset of commands or all.",
+        "complexity": "O(N) where N is the number of commands with latency information being retrieved.",
+        "group": "server",
+        "since": "7.0.0",
+        "arity": -2,
+        "container": "LATENCY",
+        "function": "latencyCommand",
+        "command_flags": [
+            "ADMIN",
+            "NOSCRIPT",
+            "LOADING",
+            "STALE"
+        ],
+        "arguments": [
+            {
+                "name": "COMMAND",
+                "type": "string",
+                "optional": true,
+                "multiple": true
+            }
+        ]
+    }
+}
diff --git a/src/config.c b/src/config.c
index 1a73378838..6dd4cc8444 100644
--- a/src/config.c
+++ b/src/config.c
@@ -2660,6 +2660,71 @@ int allowProtectedAction(int config, client *c) {
            (config == PROTECTED_ACTION_ALLOWED_LOCAL && islocalClient(c));
 }
 
+
+static int setConfigLatencyTrackingInfoPercentilesOutputOption(typeData data, sds *argv, int argc, const char **err) {
+    UNUSED(data);
+    zfree(server.latency_tracking_info_percentiles);
+    server.latency_tracking_info_percentiles = NULL;
+    server.latency_tracking_info_percentiles_len = argc;
+
+    /* Special case: treat single arg "" as zero args indicating empty percentile configuration */
+    if (argc == 1 && sdslen(argv[0]) == 0)
+        server.latency_tracking_info_percentiles_len = 0;
+    else
+        server.latency_tracking_info_percentiles = zmalloc(sizeof(double)*argc);
+
+    for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+        double percentile;
+        if (!string2d(argv[j], sdslen(argv[j]), &percentile)) {
+            *err = "Invalid latency-tracking-info-percentiles parameters";
+            goto configerr;
+        }
+        if (percentile > 100.0 || percentile < 0.0) {
+            *err = "latency-tracking-info-percentiles parameters should sit between [0.0,100.0]";
+            goto configerr;
+        }
+        server.latency_tracking_info_percentiles[j] = percentile;
+    }
+
+    return 1;
+configerr:
+    zfree(server.latency_tracking_info_percentiles);
+    server.latency_tracking_info_percentiles = NULL;
+    server.latency_tracking_info_percentiles_len = 0;
+    return 0;
+}
+
+static sds getConfigLatencyTrackingInfoPercentilesOutputOption(typeData data) {
+    UNUSED(data);
+    sds buf = sdsempty();
+    for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+        buf = sdscatprintf(buf,"%f",
+                        server.latency_tracking_info_percentiles[j]);
+        if (j != server.latency_tracking_info_percentiles_len-1)
+            buf = sdscatlen(buf," ",1);
+    }
+    return buf;
+}
+
+/* Rewrite the latency-tracking-info-percentiles option. */
+void rewriteConfigLatencyTrackingInfoPercentilesOutputOption(typeData data, const char *name, struct rewriteConfigState *state) {
+    UNUSED(data);
+    sds line = sdsnew(name);
+    /* Rewrite latency-tracking-info-percentiles parameters,
+     * or an empty 'latency-tracking-info-percentiles ""' line to avoid the
+     * defaults from being used.
+     */
+    if (!server.latency_tracking_info_percentiles_len) {
+        line = sdscat(line," \"\"");
+    } else {
+        for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+            line = sdscatprintf(line," %f",
+                server.latency_tracking_info_percentiles[j]);
+        }
+    }
+    rewriteConfigRewriteLine(state,name,line,1);
+}
+
 standardConfig configs[] = {
     /* Bool configs */
     createBoolConfig("rdbchecksum", NULL, IMMUTABLE_CONFIG, server.rdb_checksum, 1, NULL, NULL),
@@ -2705,6 +2770,7 @@ standardConfig configs[] = {
     createBoolConfig("disable-thp", NULL, MODIFIABLE_CONFIG, server.disable_thp, 1, NULL, NULL),
     createBoolConfig("cluster-allow-replica-migration", NULL, MODIFIABLE_CONFIG, server.cluster_allow_replica_migration, 1, NULL, NULL),
     createBoolConfig("replica-announced", NULL, MODIFIABLE_CONFIG, server.replica_announced, 1, NULL, NULL),
+    createBoolConfig("latency-tracking", NULL, MODIFIABLE_CONFIG, server.latency_tracking_enabled, 1, NULL, NULL),
     createBoolConfig("aof-disable-auto-gc", NULL, MODIFIABLE_CONFIG, server.aof_disable_auto_gc, 0, NULL, updateAofAutoGCEnabled),
     
     /* String Configs */
@@ -2857,6 +2923,7 @@ standardConfig configs[] = {
     createSpecialConfig("notify-keyspace-events", NULL, MODIFIABLE_CONFIG, setConfigNotifyKeyspaceEventsOption, getConfigNotifyKeyspaceEventsOption, rewriteConfigNotifyKeyspaceEventsOption, NULL),
     createSpecialConfig("bind", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigBindOption, getConfigBindOption, rewriteConfigBindOption, applyBind),
     createSpecialConfig("replicaof", "slaveof", IMMUTABLE_CONFIG | MULTI_ARG_CONFIG, setConfigReplicaOfOption, getConfigReplicaOfOption, rewriteConfigReplicaOfOption, NULL),
+    createSpecialConfig("latency-tracking-info-percentiles", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigLatencyTrackingInfoPercentilesOutputOption, getConfigLatencyTrackingInfoPercentilesOutputOption, rewriteConfigLatencyTrackingInfoPercentilesOutputOption, NULL),
 
     /* NULL Terminator */
     {NULL}
diff --git a/src/latency.c b/src/latency.c
index 5b3476a817..ff8d66069b 100644
--- a/src/latency.c
+++ b/src/latency.c
@@ -490,6 +490,78 @@ sds createLatencyReport(void) {
 
 /* ---------------------- Latency command implementation -------------------- */
 
+/* latencyCommand() helper to produce a map of time buckets,
+ * each representing a latency range,
+ * between 1 nanosecond and roughly 1 second.
+ * Each bucket covers twice the previous bucket's range.
+ * Empty buckets are not printed.
+ * Everything above 1 sec is considered +Inf.
+ * At max there will be log2(1000000000)=30 buckets */
+void fillCommandCDF(client *c, struct hdr_histogram* histogram) {
+    addReplyMapLen(c,2);
+    addReplyBulkCString(c,"calls");
+    addReplyLongLong(c,(long long) histogram->total_count);
+    addReplyBulkCString(c,"histogram_usec");
+    void *replylen = addReplyDeferredLen(c);
+    int samples = 0;
+    struct hdr_iter iter;
+    hdr_iter_log_init(&iter,histogram,1024,2);
+    int64_t previous_count = 0;
+    while (hdr_iter_next(&iter)) {
+        const int64_t micros = iter.highest_equivalent_value / 1000;
+        const int64_t cumulative_count = iter.cumulative_count;
+        if(cumulative_count > previous_count){
+            addReplyLongLong(c,(long long) micros);
+            addReplyLongLong(c,(long long) cumulative_count);
+            samples++;
+        }
+        previous_count = cumulative_count;
+    }
+    setDeferredMapLen(c,replylen,samples);
+}
+
+/* latencyCommand() helper to produce for all commands,
+ * a per command cumulative distribution of latencies. */
+void latencyAllCommandsFillCDF(client *c) {
+    dictIterator *di = dictGetSafeIterator(server.commands);
+    dictEntry *de;
+    struct redisCommand *cmd;
+    void *replylen = addReplyDeferredLen(c);
+    int command_with_data = 0;
+    while((de = dictNext(di)) != NULL) {
+        cmd = (struct redisCommand *) dictGetVal(de);
+        if (!cmd->latency_histogram)
+            continue;
+        addReplyBulkCString(c,cmd->name);
+        fillCommandCDF(c, cmd->latency_histogram);
+        command_with_data++;
+    }
+    dictReleaseIterator(di);
+    setDeferredMapLen(c,replylen,command_with_data);
+}
+
+/* latencyCommand() helper to produce for a specific command set,
+ * a per command cumulative distribution of latencies. */
+void latencySpecificCommandsFillCDF(client *c) {
+    void *replylen = addReplyDeferredLen(c);
+    int command_with_data = 0;
+    for (int j = 2; j < c->argc; j++){
+        struct redisCommand *cmd = dictFetchValue(server.commands, c->argv[j]->ptr);
+        /* If the command does not exist we skip the reply */
+        if (cmd == NULL) {
+            continue;
+        }
+        /* If no latency info we reply with the same format as non empty histograms */
+        if (!cmd->latency_histogram) {
+            continue;
+        }
+        addReplyBulkCString(c,c->argv[j]->ptr);
+        fillCommandCDF(c, cmd->latency_histogram);
+        command_with_data++;
+    }
+    setDeferredMapLen(c,replylen,command_with_data);
+}
+
 /* latencyCommand() helper to produce a time-delay reply for all the samples
  * in memory for the specified time series. */
 void latencyCommandReplyWithSamples(client *c, struct latencyTimeSeries *ts) {
@@ -582,6 +654,7 @@ sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
  * LATENCY DOCTOR: returns a human readable analysis of instance latency.
  * LATENCY GRAPH: provide an ASCII graph of the latency of the specified event.
  * LATENCY RESET: reset data of a specified event or all the data if no event provided.
+ * LATENCY HISTOGRAM: return a cumulative distribution of latencies in the format of an histogram for the specified command names.
  */
 void latencyCommand(client *c) {
     struct latencyTimeSeries *ts;
@@ -628,6 +701,13 @@ void latencyCommand(client *c) {
                 resets += latencyResetEvent(c->argv[j]->ptr);
             addReplyLongLong(c,resets);
         }
+    } else if (!strcasecmp(c->argv[1]->ptr,"histogram") && c->argc >= 2) {
+        /* LATENCY HISTOGRAM*/
+        if (c->argc == 2) {
+            latencyAllCommandsFillCDF(c);
+        } else {
+            latencySpecificCommandsFillCDF(c);
+        }
     } else if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
         const char *help[] = {
 "DOCTOR",
@@ -641,6 +721,9 @@ void latencyCommand(client *c) {
 "RESET [<event> ...]",
 "    Reset latency data of one or more <event> classes.",
 "    (default: reset all data for all event classes)",
+"HISTOGRAM [COMMAND ...]",
+"    Return a cumulative distribution of latencies in the format of a histogram for the specified command names.",
+"    If no commands are specified then all histograms are replied.",
 NULL
         };
         addReplyHelp(c, help);
diff --git a/src/module.c b/src/module.c
index dcc1641bf9..f1dc1929b7 100644
--- a/src/module.c
+++ b/src/module.c
@@ -9589,6 +9589,10 @@ void moduleUnregisterCommands(struct RedisModule *module) {
                 sdsfree((sds)cmd->summary);
                 sdsfree((sds)cmd->since);
                 sdsfree((sds)cmd->complexity);
+                if (cmd->latency_histogram) {
+                    hdr_close(cmd->latency_histogram);
+                    cmd->latency_histogram = NULL;
+                }
                 zfree(cmd->args);
                 zfree(cmd);
                 zfree(cp);
diff --git a/src/server.c b/src/server.c
index 96e293a244..42b6198d9b 100644
--- a/src/server.c
+++ b/src/server.c
@@ -36,6 +36,7 @@
 #include "atomicvar.h"
 #include "mt19937-64.h"
 #include "functions.h"
+#include "hdr_alloc.h"
 
 #include <time.h>
 #include <signal.h>
@@ -1781,6 +1782,13 @@ void initServerConfig(void) {
     server.page_size = sysconf(_SC_PAGESIZE);
     server.pause_cron = 0;
 
+    server.latency_tracking_enabled = 1;
+    server.latency_tracking_info_percentiles_len = 3;
+    server.latency_tracking_info_percentiles = zmalloc(sizeof(double)*(server.latency_tracking_info_percentiles_len));
+    server.latency_tracking_info_percentiles[0] = 50.0;  /* p50 */
+    server.latency_tracking_info_percentiles[1] = 99.0;  /* p99 */
+    server.latency_tracking_info_percentiles[2] = 99.9;  /* p999 */
+
     unsigned int lruclock = getLRUClock();
     atomicSet(server.lruclock,lruclock);
     resetServerSaveParams();
@@ -1789,6 +1797,15 @@ void initServerConfig(void) {
     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */
     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
 
+    /* Specify the allocation function for the hdr histogram */
+    hdrAllocFuncs hdrallocfn = {
+        .mallocFn = zmalloc,
+        .callocFn = zcalloc_num,
+        .reallocFn = zrealloc,
+        .freeFn = zfree,
+    };
+    hdrSetAllocators(&hdrallocfn);
+
     /* Replication related */
     server.masterhost = NULL;
     server.masterport = 6379;
@@ -2610,6 +2627,10 @@ void populateCommandStructure(struct redisCommand *c) {
     c->key_specs = c->key_specs_static;
     c->key_specs_max = STATIC_KEY_SPECS_NUM;
 
+    /* We start with an unallocated histogram and only allocate memory when a command
+     * has been issued for the first time */
+    c->latency_histogram = NULL;
+
     for (int i = 0; i < STATIC_KEY_SPECS_NUM; i++) {
         if (c->key_specs[i].begin_search_type == KSPEC_BS_INVALID)
             break;
@@ -2683,6 +2704,10 @@ void resetCommandTableStats(dict* commands) {
         c->calls = 0;
         c->rejected_calls = 0;
         c->failed_calls = 0;
+        if(c->latency_histogram) {
+            hdr_close(c->latency_histogram);
+            c->latency_histogram = NULL;
+        }
         if (c->subcommands_dict)
             resetCommandTableStats(c->subcommands_dict);
     }
@@ -2918,6 +2943,19 @@ void slowlogPushCurrentCommand(client *c, struct redisCommand *cmd, ustime_t dur
     slowlogPushEntryIfNeeded(c,argv,argc,duration);
 }
 
+/* This function is called in order to update the total command histogram duration.
+ * The latency unit is nano-seconds.
+ * If needed it will allocate the histogram memory and trim the duration to the upper/lower tracking limits*/
+void updateCommandLatencyHistogram(struct hdr_histogram **latency_histogram, int64_t duration_hist){
+    if (duration_hist < LATENCY_HISTOGRAM_MIN_VALUE)
+        duration_hist=LATENCY_HISTOGRAM_MIN_VALUE;
+    if (duration_hist>LATENCY_HISTOGRAM_MAX_VALUE)
+        duration_hist=LATENCY_HISTOGRAM_MAX_VALUE;
+    if (*latency_histogram==NULL)
+        hdr_init(LATENCY_HISTOGRAM_MIN_VALUE,LATENCY_HISTOGRAM_MAX_VALUE,LATENCY_HISTOGRAM_PRECISION,latency_histogram);
+    hdr_record_value(*latency_histogram,duration_hist);
+}
+
 /* Handle the alsoPropagate() API to handle commands that want to propagate
  * multiple separated commands. Note that alsoPropagate() is not affected
  * by CLIENT_PREVENT_PROP flag. */
@@ -3104,6 +3142,9 @@ void call(client *c, int flags) {
     if (flags & CMD_CALL_STATS) {
         real_cmd->microseconds += duration;
         real_cmd->calls++;
+        /* If the client is blocked we will handle latency stats when it is unblocked. */
+        if (server.latency_tracking_enabled && !(c->flags & CLIENT_BLOCKED))
+            updateCommandLatencyHistogram(&(real_cmd->latency_histogram), duration*1000);
     }
 
     /* Propagate the command into the AOF and replication link.
@@ -4573,6 +4614,19 @@ void bytesToHuman(char *s, unsigned long long n) {
     }
 }
 
+/* Fill percentile distribution of latencies. */
+sds fillPercentileDistributionLatencies(sds info, const char* histogram_name, struct hdr_histogram* histogram) {
+    info = sdscatfmt(info,"latency_percentiles_usec_%s:",histogram_name);
+    for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
+        info = sdscatprintf(info,"p%f=%.3f", server.latency_tracking_info_percentiles[j],
+            ((double)hdr_value_at_percentile(histogram,server.latency_tracking_info_percentiles[j]))/1000.0f);
+        if (j != server.latency_tracking_info_percentiles_len-1)
+            info = sdscatlen(info,",",1);
+        }
+    info = sdscatprintf(info,"\r\n");
+    return info;
+}
+
 sds getFullCommandName(struct redisCommand *cmd) {
     if (!cmd->parent) {
         return sdsnew(cmd->name);
@@ -5346,6 +5400,27 @@ sds genRedisInfoString(const char *section) {
         raxStop(&ri);
     }
 
+    /* Latency by percentile distribution per command */
+    if (allsections || !strcasecmp(section,"latencystats")) {
+        if (sections++) info = sdscat(info,"\r\n");
+        info = sdscatprintf(info, "# Latencystats\r\n");
+        if (server.latency_tracking_enabled) {
+            struct redisCommand *c;
+            dictEntry *de;
+            dictIterator *di;
+            di = dictGetSafeIterator(server.commands);
+            while((de = dictNext(di)) != NULL) {
+                char *tmpsafe;
+                c = (struct redisCommand *) dictGetVal(de);
+                if (!c->latency_histogram)
+                    continue;
+                info = fillPercentileDistributionLatencies(info,getSafeInfoString(c->name, strlen(c->name), &tmpsafe),c->latency_histogram);
+                if (tmpsafe != NULL) zfree(tmpsafe);
+            }
+            dictReleaseIterator(di);
+        }
+    }
+
     /* Cluster */
     if (allsections || defsections || !strcasecmp(section,"cluster")) {
         if (sections++) info = sdscat(info,"\r\n");
diff --git a/src/server.h b/src/server.h
index 3706cca237..fc16cdcd69 100644
--- a/src/server.h
+++ b/src/server.h
@@ -50,6 +50,7 @@
 #include <sys/socket.h>
 #include <lua.h>
 #include <signal.h>
+#include "hdr_histogram.h"
 
 #ifdef HAVE_LIBSYSTEMD
 #include <systemd/sd-daemon.h>
@@ -571,6 +572,13 @@ typedef enum {
 #define serverAssert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),redis_unreachable()))
 #define serverPanic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),redis_unreachable()
 
+/* latency histogram per command init settings */
+#define LATENCY_HISTOGRAM_MIN_VALUE 1L        /* >= 1 nanosec */
+#define LATENCY_HISTOGRAM_MAX_VALUE 1000000000L  /* <= 1 secs */
+#define LATENCY_HISTOGRAM_PRECISION 2  /* Maintain a value precision of 2 significant digits across LATENCY_HISTOGRAM_MIN_VALUE and LATENCY_HISTOGRAM_MAX_VALUE range.
+                                        * Value quantization within the range will thus be no larger than 1/100th (or 1%) of any value.
+                                        * The total size per histogram should sit around 40 KiB Bytes. */
+
 /*-----------------------------------------------------------------------------
  * Data types
  *----------------------------------------------------------------------------*/
@@ -1583,6 +1591,9 @@ struct redisServer {
     char *proc_title_template;      /* Process title template format */
     clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
     int pause_cron;                 /* Don't run cron tasks (debug) */
+    int latency_tracking_enabled;   /* 1 if extended latency tracking is enabled, 0 otherwise. */
+    double *latency_tracking_info_percentiles; /* Extended latency tracking info output percentile list configuration. */
+    int latency_tracking_info_percentiles_len;
     /* AOF persistence */
     int aof_enabled;                /* AOF configuration */
     int aof_state;                  /* AOF_(ON|OFF|WAIT_REWRITE) */
@@ -2175,6 +2186,7 @@ struct redisCommand {
                    ACLs. A connection is able to execute a given command if
                    the user associated to the connection has this command
                    bit set in the bitmap of allowed commands. */
+    struct hdr_histogram* latency_histogram; /*points to the command latency command histogram (unit of time nanosecond) */
     keySpec *key_specs;
     keySpec legacy_range_key_spec; /* The legacy (first,last,step) key spec is
                                      * still maintained (if applicable) so that
@@ -2782,6 +2794,7 @@ void preventCommandPropagation(client *c);
 void preventCommandAOF(client *c);
 void preventCommandReplication(client *c);
 void slowlogPushCurrentCommand(client *c, struct redisCommand *cmd, ustime_t duration);
+void updateCommandLatencyHistogram(struct hdr_histogram** latency_histogram, int64_t duration_hist);
 int prepareForShutdown(int flags);
 void replyToClientsBlockedOnShutdown(void);
 int abortShutdown(void);
diff --git a/src/zmalloc.c b/src/zmalloc.c
index 827f23c85d..0dd4c9836c 100644
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@@ -181,6 +181,20 @@ void *ztrycalloc_usable(size_t size, size_t *usable) {
 #endif
 }
 
+/* Allocate memory and zero it or panic.
+ * We need this wrapper to have a calloc compatible signature */
+void *zcalloc_num(size_t num, size_t size) {
+    /* Ensure that the arguments to calloc(), when multiplied, do not wrap.
+     * Division operations are susceptible to divide-by-zero errors so we also check it. */
+    if ((size == 0) || (num > SIZE_MAX/size)) {
+        zmalloc_oom_handler(SIZE_MAX);
+        return NULL;
+    }
+    void *ptr = ztrycalloc_usable(num*size, NULL);
+    if (!ptr) zmalloc_oom_handler(num*size);
+    return ptr;
+}
+
 /* Allocate memory and zero it or panic */
 void *zcalloc(size_t size) {
     void *ptr = ztrycalloc_usable(size, NULL);
diff --git a/src/zmalloc.h b/src/zmalloc.h
index b3e20d77f6..fb448f7f20 100644
--- a/src/zmalloc.h
+++ b/src/zmalloc.h
@@ -98,6 +98,7 @@
 
 void *zmalloc(size_t size);
 void *zcalloc(size_t size);
+void *zcalloc_num(size_t num, size_t size);
 void *zrealloc(void *ptr, size_t size);
 void *ztrymalloc(size_t size);
 void *ztrycalloc(size_t size);
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index 758e066684..24199077d0 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -631,6 +631,12 @@ proc errorrstat {cmd r} {
     }
 }
 
+proc latencyrstat_percentiles {cmd r} {
+    if {[regexp "\r\nlatency_percentiles_usec_$cmd:(.*?)\r\n" [$r info latencystats] _ value]} {
+        set _ $value
+    }
+}
+
 proc generate_fuzzy_traffic_on_key {key duration} {
     # Commands per type, blocking commands removed
     # TODO: extract these from help.h or elsewhere, and improve to include other types
diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl
index 7e383bec37..74c3a24c9c 100644
--- a/tests/unit/info.tcl
+++ b/tests/unit/info.tcl
@@ -6,9 +6,90 @@ proc errorstat {cmd} {
     return [errorrstat $cmd r]
 }
 
+proc latency_percentiles_usec {cmd} {
+    return [latencyrstat_percentiles $cmd r]
+}
+
 start_server {tags {"info" "external:skip"}} {
     start_server {} {
 
+        test {latencystats: disable/enable} {
+            r config resetstat
+            r CONFIG SET latency-tracking no
+            r set a b
+            assert_match {} [latency_percentiles_usec set]
+            r CONFIG SET latency-tracking yes
+            r set a b
+            assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec set]
+            r config resetstat
+            assert_match {} [latency_percentiles_usec set]
+        }
+
+        test {latencystats: configure percentiles} {
+            r config resetstat
+            assert_match {} [latency_percentiles_usec set]
+            r CONFIG SET latency-tracking yes
+            r SET a b
+            r GET a
+            assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec set]
+            assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec get]
+            r CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"
+            assert_match [r config get latency-tracking-info-percentiles] {latency-tracking-info-percentiles {0.000000 50.000000 100.000000}}
+            assert_match {*p0.000000=*,p50.000000=*,p100.000000=*} [latency_percentiles_usec set]
+            assert_match {*p0.000000=*,p50.000000=*,p100.000000=*} [latency_percentiles_usec get]
+            r config resetstat
+            assert_match {} [latency_percentiles_usec set]
+        }
+
+        test {latencystats: bad configure percentiles} {
+            r config resetstat
+            set configlatencyline [r config get latency-tracking-info-percentiles]
+            catch {r CONFIG SET latency-tracking-info-percentiles "10.0 50.0 a"} e
+            assert_match {ERR CONFIG SET failed*} $e
+            assert_equal [s total_error_replies] 1
+            assert_match [r config get latency-tracking-info-percentiles] $configlatencyline
+            catch {r CONFIG SET latency-tracking-info-percentiles "10.0 50.0 101.0"} e
+            assert_match {ERR CONFIG SET failed*} $e
+            assert_equal [s total_error_replies] 2
+            assert_match [r config get latency-tracking-info-percentiles] $configlatencyline
+            r config resetstat
+            assert_match {} [errorstat ERR]
+        }
+
+        test {latencystats: blocking commands} {
+            r config resetstat
+            r CONFIG SET latency-tracking yes
+            r CONFIG SET latency-tracking-info-percentiles "50.0 99.0 99.9"
+            set rd [redis_deferring_client]
+            r del list1{t}
+
+            $rd blpop list1{t} 0
+            wait_for_blocked_client
+            r lpush list1{t} a
+            assert_equal [$rd read] {list1{t} a}
+            $rd blpop list1{t} 0
+            wait_for_blocked_client
+            r lpush list1{t} b
+            assert_equal [$rd read] {list1{t} b}
+            assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec blpop]
+            $rd close
+        }
+
+        test {latencystats: measure latency} {
+            r config resetstat
+            r CONFIG SET latency-tracking yes
+            r CONFIG SET latency-tracking-info-percentiles "50.0"
+            r DEBUG sleep 0.05
+            r SET k v
+            set latencystatline_debug [latency_percentiles_usec debug]
+            set latencystatline_set [latency_percentiles_usec set]
+            regexp "p50.000000=(.+\..+)" $latencystatline_debug -> p50_debug
+            regexp "p50.000000=(.+\..+)" $latencystatline_set -> p50_set
+            assert {$p50_debug >= 50000}
+            assert {$p50_set >= 0}
+            assert {$p50_debug >= $p50_set}
+        } {} {needs:debug}
+
         test {errorstats: failed call authentication error} {
             r config resetstat
             assert_match {} [errorstat ERR]
diff --git a/tests/unit/latency-monitor.tcl b/tests/unit/latency-monitor.tcl
index a0feba94a2..239b892bad 100644
--- a/tests/unit/latency-monitor.tcl
+++ b/tests/unit/latency-monitor.tcl
@@ -1,8 +1,59 @@
+proc latency_histogram {cmd} {
+    return [lindex [r latency histogram $cmd] 1]
+}
+
 start_server {tags {"latency-monitor needs:latency"}} {
     # Set a threshold high enough to avoid spurious latency events.
     r config set latency-monitor-threshold 200
     r latency reset
 
+    test {LATENCY HISTOGRAM with empty histogram} {
+        r config resetstat
+        assert_match {} [latency_histogram set]
+        assert {[llength [r latency histogram]] == 0}
+    }
+
+    test {LATENCY HISTOGRAM all commands} {
+        r config resetstat
+        r set a b
+        r set c d
+        assert_match {calls 2 histogram_usec *} [latency_histogram set]
+    }
+
+    test {LATENCY HISTOGRAM with a subset of commands} {
+        r config resetstat
+        r set a b
+        r set c d
+        r get a
+        r hset f k v
+        r hgetall f
+        assert_match {calls 2 histogram_usec *} [latency_histogram set]
+        assert_match {calls 1 histogram_usec *} [latency_histogram hset]
+        assert_match {calls 1 histogram_usec *} [latency_histogram hgetall]
+        assert_match {calls 1 histogram_usec *} [latency_histogram get]
+        assert {[llength [r latency histogram]] == 8}
+        assert {[llength [r latency histogram set get]] == 4}
+    }
+
+    test {LATENCY HISTOGRAM command} {
+        r config resetstat
+        r set a b
+        r get a
+        assert {[llength [r latency histogram]] == 4}
+        assert {[llength [r latency histogram set get]] == 4}
+    }
+
+    test {LATENCY HISTOGRAM with wrong command name skips the invalid one} {
+        r config resetstat
+        assert {[llength [r latency histogram blabla]] == 0}
+        assert {[llength [r latency histogram blabla blabla2 set get]] == 0}
+        r set a b
+        r get a
+        assert_match {calls 1 histogram_usec *} [lindex [r latency histogram blabla blabla2 set get] 1]
+        assert_match {calls 1 histogram_usec *} [lindex [r latency histogram blabla blabla2 set get] 3]
+        assert {[string length [r latency histogram blabla set get]] > 0}
+    }
+
     test {Test latency events logging} {
         r debug sleep 0.3
         after 1100
diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl
index 0f724403e0..8b0e5045a0 100644
--- a/tests/unit/maxmemory.tcl
+++ b/tests/unit/maxmemory.tcl
@@ -326,6 +326,10 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline}
             $master config set client-output-buffer-limit "replica 100000000 100000000 300"
             $master config set repl-backlog-size [expr {10*1024}]
 
+            # disable latency tracking
+            $master config set latency-tracking no
+            $slave config set latency-tracking no
+
             $slave slaveof $master_host $master_port
             wait_for_condition 50 100 {
                 [s 0 master_link_status] eq {up}
@@ -412,6 +416,7 @@ test_slave_buffers "replica buffer don't induce eviction" 100000 100 1 0
 
 start_server {tags {"maxmemory external:skip"}} {
     test {Don't rehash if used memory exceeds maxmemory after rehash} {
+        r config set latency-tracking no
         r config set maxmemory 0
         r config set maxmemory-policy allkeys-random
 
@@ -432,6 +437,7 @@ start_server {tags {"maxmemory external:skip"}} {
 
 start_server {tags {"maxmemory external:skip"}} {
     test {client tracking don't cause eviction feedback loop} {
+        r config set latency-tracking no
         r config set maxmemory 0
         r config set maxmemory-policy allkeys-lru
         r config set maxmemory-eviction-tenacity 100
diff --git a/tests/unit/moduleapi/blockonbackground.tcl b/tests/unit/moduleapi/blockonbackground.tcl
index 79ca521435..14c5b4898f 100644
--- a/tests/unit/moduleapi/blockonbackground.tcl
+++ b/tests/unit/moduleapi/blockonbackground.tcl
@@ -2,6 +2,10 @@ set testmodule [file normalize tests/modules/blockonbackground.so]
 
 source tests/support/util.tcl
 
+proc latency_percentiles_usec {cmd} {
+    return [latencyrstat_percentiles $cmd r]
+}
+
 start_server {tags {"modules"}} {
     r module load $testmodule
 
@@ -16,16 +20,21 @@ start_server {tags {"modules"}} {
             assert_equal [r slowlog len] 0
         }
         r config resetstat
+        r config set latency-tracking yes
+        r config set latency-tracking-info-percentiles "50.0"
         r block.debug 200 10000
         if {!$::no_latency} {
             assert_equal [r slowlog len] 1
         }
 
         set cmdstatline [cmdrstat block.debug r]
+        set latencystatline_debug [latency_percentiles_usec block.debug]
 
-        regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call
+        regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline -> usec usec_per_call
+        regexp "p50.000000=(.+\..+)" $latencystatline_debug  -> p50
         assert {$usec >= 100000}
         assert {$usec_per_call >= 100000}
+        assert {$p50 >= 100000}
     }
 
     test { blocked clients time tracking - check blocked command that uses RedisModule_BlockedClientMeasureTimeStart() is tracking background time even in timeout } {
diff --git a/tests/unit/obuf-limits.tcl b/tests/unit/obuf-limits.tcl
index bbb1140d62..4453d4889a 100644
--- a/tests/unit/obuf-limits.tcl
+++ b/tests/unit/obuf-limits.tcl
@@ -115,6 +115,7 @@ start_server {tags {"obuf-limits external:skip"}} {
     }
 
     test {No response for single command if client output buffer hard limit is enforced} {
+        r config set latency-tracking no
         r config set client-output-buffer-limit {normal 100000 0 0}
         # Total size of all items must be more than 100k
         set item [string repeat "x" 1000]