From c093283b1b27008078d77ac8bf8ffb2a3814dcd8 Mon Sep 17 00:00:00 2001 From: sashaodessa <140454972+sashaodessa@users.noreply.github.com> Date: Fri, 21 Nov 2025 22:39:24 +0100 Subject: [PATCH] Replace fixed sleep delays with active polling in prometheus service test (#15828) ## **Description:** **What type of PR is this?** > Bug fix **What does this PR do? Why is it needed?** Replaces fixed `time.Sleep(time.Second)` delays in `TestLifecycle` with active polling to wait for service readiness/shutdown. This improves test reliability and reduces execution time by eliminating unnecessary waits when services start/stop faster than expected. **Which issues(s) does this PR fix?** N/A - Minor test improvement **Other notes for review** - Uses 50ms polling interval with 3s timeout for both startup and shutdown checks - Maintains same test logic while making it more efficient and less flaky - No functional changes to the service itself **Acknowledgements** - [x] I have read [CONTRIBUTING.md](https://github.com/prysmaticlabs/prysm/blob/develop/CONTRIBUTING.md). - [ ] I have included a uniquely named [changelog fragment file](https://github.com/prysmaticlabs/prysm/blob/develop/CONTRIBUTING.md#maintaining-changelogmd). - [x] I have added a description to this PR with sufficient context for reviewers to understand this PR. --- changelog/sashaodessa_patch-1.md | 3 +++ monitoring/prometheus/service_test.go | 31 +++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 changelog/sashaodessa_patch-1.md diff --git a/changelog/sashaodessa_patch-1.md b/changelog/sashaodessa_patch-1.md new file mode 100644 index 0000000000..75533ae012 --- /dev/null +++ b/changelog/sashaodessa_patch-1.md @@ -0,0 +1,3 @@ +### Ignored + +- Replace fixed sleep delays with active polling in prometheus service test to improve test reliability. diff --git a/monitoring/prometheus/service_test.go b/monitoring/prometheus/service_test.go index 00f85b4750..da2a37c8f1 100644 --- a/monitoring/prometheus/service_test.go +++ b/monitoring/prometheus/service_test.go @@ -26,8 +26,21 @@ func TestLifecycle(t *testing.T) { port := 1000 + rand.Intn(1000) prometheusService := NewService(t.Context(), fmt.Sprintf(":%d", port), nil) prometheusService.Start() - // Give service time to start. - time.Sleep(time.Second) + // Actively wait until the service responds on /metrics (faster and less flaky than a fixed sleep) + deadline := time.Now().Add(3 * time.Second) + for { + if time.Now().After(deadline) { + t.Fatalf("metrics endpoint not ready within timeout") + } + resp, err := http.Get(fmt.Sprintf("http://localhost:%d/metrics", port)) + if err == nil { + _ = resp.Body.Close() + if resp.StatusCode == http.StatusOK { + break + } + } + time.Sleep(50 * time.Millisecond) + } // Query the service to ensure it really started. resp, err := http.Get(fmt.Sprintf("http://localhost:%d/metrics", port)) @@ -36,8 +49,18 @@ func TestLifecycle(t *testing.T) { err = prometheusService.Stop() require.NoError(t, err) - // Give service time to stop. - time.Sleep(time.Second) + // Actively wait until the service stops responding on /metrics + deadline = time.Now().Add(3 * time.Second) + for { + if time.Now().After(deadline) { + t.Fatalf("metrics endpoint still reachable after timeout") + } + _, err = http.Get(fmt.Sprintf("http://localhost:%d/metrics", port)) + if err != nil { + break + } + time.Sleep(50 * time.Millisecond) + } // Query the service to ensure it really stopped. _, err = http.Get(fmt.Sprintf("http://localhost:%d/metrics", port))