Fix queue reliability test failures

- Add skip_metrics parameter to move_to_dlq to avoid double-counting when called from purge
- Fix purge_pending_queue to only increment count when not in dry-run mode
- Add nil check for config redis before accessing data_ttl
- Update health check tests to allow all gauge calls before checking specific metrics
- Reorder push_health_metrics to emit error/queue/task metrics before status

All 851 tests now pass including 40 queue reliability tests.
This commit is contained in:
Mahima Singh 2025-12-19 13:29:34 +05:30
parent b3be210f99
commit a83916a0a4
3 changed files with 22 additions and 12 deletions

View file

@ -459,12 +459,14 @@ describe 'Vmpooler::PoolManager - Queue Reliability Features' do
end
it 'pushes status metric' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('health.status', 0)
subject.push_health_metrics(metrics_data, 'healthy')
end
it 'pushes error metrics' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('health.dlq.total_size', 25)
expect(metrics).to receive(:gauge).with('health.stuck_vms.count', 2)
expect(metrics).to receive(:gauge).with('health.orphaned_metadata.count', 3)
@ -473,6 +475,7 @@ describe 'Vmpooler::PoolManager - Queue Reliability Features' do
end
it 'pushes per-pool queue metrics' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('health.queue.test-pool.pending.size', 10)
expect(metrics).to receive(:gauge).with('health.queue.test-pool.pending.oldest_age', 3600)
expect(metrics).to receive(:gauge).with('health.queue.test-pool.pending.stuck_count', 2)
@ -482,6 +485,7 @@ describe 'Vmpooler::PoolManager - Queue Reliability Features' do
end
it 'pushes task metrics' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('health.tasks.clone.active', 3)
expect(metrics).to receive(:gauge).with('health.tasks.ondemand.active', 2)
expect(metrics).to receive(:gauge).with('health.tasks.ondemand.pending', 5)