Prevent VM allocation for already-deleted request-ids

This commit is contained in:
Mahima Singh 2025-12-19 15:31:37 +05:30
parent c24fe28d6d
commit 46e77010f6
9 changed files with 127 additions and 1230 deletions

View file

@ -1107,7 +1107,8 @@ EOT
context 'with no errors during cloning' do
before(:each) do
allow(metrics).to receive(:timing)
expect(metrics).to receive(:timing).with(/clone\./,/0/)
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with(/vmpooler_clone\./,/0/)
expect(provider).to receive(:create_vm).with(pool, String)
allow(provider).to receive(:get_vm_ip_address).and_return(1)
allow(subject).to receive(:get_domain_for_pool).and_return('example.com')
@ -1158,7 +1159,8 @@ EOT
context 'with a failure to get ip address after cloning' do
it 'should log a message that it completed being cloned' do
allow(metrics).to receive(:timing)
expect(metrics).to receive(:timing).with(/clone\./,/0/)
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with(/vmpooler_clone\./,/0/)
expect(provider).to receive(:create_vm).with(pool, String)
allow(provider).to receive(:get_vm_ip_address).and_return(nil)
@ -1217,7 +1219,8 @@ EOT
context 'with request_id' do
before(:each) do
allow(metrics).to receive(:timing)
expect(metrics).to receive(:timing).with(/clone\./,/0/)
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with(/vmpooler_clone\./,/0/)
expect(provider).to receive(:create_vm).with(pool, String)
allow(provider).to receive(:get_vm_ip_address).with(vm,pool).and_return(1)
allow(subject).to receive(:get_dns_plugin_class_name_for_pool).and_return(dns_plugin)
@ -1255,7 +1258,7 @@ EOT
resolv = class_double("Resolv").as_stubbed_const(:transfer_nested_constants => true)
expect(subject).to receive(:generate_and_check_hostname).exactly(3).times.and_return([vm_name, true]) #skip this, make it available all times
expect(resolv).to receive(:getaddress).exactly(3).times.and_return("1.2.3.4")
expect(metrics).to receive(:increment).with("errors.staledns.#{pool}").exactly(3).times
expect(metrics).to receive(:increment).with("vmpooler_errors.staledns.#{pool}").exactly(3).times
expect{subject._clone_vm(pool,provider,dns_plugin)}.to raise_error(/Unable to generate a unique hostname after/)
end
it 'should be successful if DNS does not exist' do
@ -1353,7 +1356,8 @@ EOT
it 'should emit a timing metric' do
allow(subject).to receive(:get_vm_usage_labels)
allow(metrics).to receive(:timing)
expect(metrics).to receive(:timing).with("destroy.#{pool}", String)
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with("vmpooler_destroy.#{pool}", String)
subject._destroy_vm(vm,pool,provider,dns_plugin)
end
@ -5174,6 +5178,44 @@ EOT
end
end
context 'when request is already marked as failed' do
let(:request_string) { "#{pool}:#{pool}:1" }
before(:each) do
redis_connection_pool.with do |redis|
create_ondemand_request_for_test(request_id, current_time.to_i, request_string, redis)
set_ondemand_request_status(request_id, 'failed', redis)
end
end
it 'logs that the request is already failed' do
redis_connection_pool.with do |redis|
expect(logger).to receive(:log).with('s', "Request '#{request_id}' already marked as failed, skipping VM creation")
subject.create_ondemand_vms(request_id, redis)
end
end
it 'removes the request from provisioning__request queue' do
redis_connection_pool.with do |redis|
subject.create_ondemand_vms(request_id, redis)
expect(redis.zscore('vmpooler__provisioning__request', request_id)).to be_nil
end
end
it 'does not create VM tasks' do
redis_connection_pool.with do |redis|
subject.create_ondemand_vms(request_id, redis)
expect(redis.zcard('vmpooler__odcreate__task')).to eq(0)
end
end
it 'does not add to provisioning__processing queue' do
redis_connection_pool.with do |redis|
subject.create_ondemand_vms(request_id, redis)
expect(redis.zscore('vmpooler__provisioning__processing', request_id)).to be_nil
end
end
end
context 'with a request that has data' do
let(:request_string) { "#{pool}:#{pool}:1" }
before(:each) do

View file

@ -119,7 +119,7 @@ describe 'Vmpooler::PoolManager - Queue Reliability Features' do
it 'increments DLQ metrics' do
redis_connection_pool.with do |redis_connection|
expect(metrics).to receive(:increment).with('dlq.pending.count')
expect(metrics).to receive(:increment).with('vmpooler_dlq.pending.count')
subject.move_to_dlq(vm, pool, 'pending', error_class, error_message, redis_connection)
end
@ -223,7 +223,7 @@ describe 'Vmpooler::PoolManager - Queue Reliability Features' do
it 'increments purge metrics' do
redis_connection_pool.with do |redis_connection|
expect(metrics).to receive(:increment).with("purge.pending.#{pool}.count")
expect(metrics).to receive(:increment).with("vmpooler_purge.pending.#{pool}.count")
subject.purge_pending_queue(pool, redis_connection)
end
@ -460,35 +460,35 @@ describe 'Vmpooler::PoolManager - Queue Reliability Features' do
it 'pushes status metric' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('health.status', 0)
expect(metrics).to receive(:gauge).with('vmpooler_health.status', 0)
subject.push_health_metrics(metrics_data, 'healthy')
end
it 'pushes error metrics' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('health.dlq.total_size', 25)
expect(metrics).to receive(:gauge).with('health.stuck_vms.count', 2)
expect(metrics).to receive(:gauge).with('health.orphaned_metadata.count', 3)
expect(metrics).to receive(:gauge).with('vmpooler_health.dlq.total_size', 25)
expect(metrics).to receive(:gauge).with('vmpooler_health.stuck_vms.count', 2)
expect(metrics).to receive(:gauge).with('vmpooler_health.orphaned_metadata.count', 3)
subject.push_health_metrics(metrics_data, 'healthy')
end
it 'pushes per-pool queue metrics' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('health.queue.test-pool.pending.size', 10)
expect(metrics).to receive(:gauge).with('health.queue.test-pool.pending.oldest_age', 3600)
expect(metrics).to receive(:gauge).with('health.queue.test-pool.pending.stuck_count', 2)
expect(metrics).to receive(:gauge).with('health.queue.test-pool.ready.size', 50)
expect(metrics).to receive(:gauge).with('vmpooler_health.queue.test-pool.pending.size', 10)
expect(metrics).to receive(:gauge).with('vmpooler_health.queue.test-pool.pending.oldest_age', 3600)
expect(metrics).to receive(:gauge).with('vmpooler_health.queue.test-pool.pending.stuck_count', 2)
expect(metrics).to receive(:gauge).with('vmpooler_health.queue.test-pool.ready.size', 50)
subject.push_health_metrics(metrics_data, 'healthy')
end
it 'pushes task metrics' do
allow(metrics).to receive(:gauge)
expect(metrics).to receive(:gauge).with('health.tasks.clone.active', 3)
expect(metrics).to receive(:gauge).with('health.tasks.ondemand.active', 2)
expect(metrics).to receive(:gauge).with('health.tasks.ondemand.pending', 5)
expect(metrics).to receive(:gauge).with('vmpooler_health.tasks.clone.active', 3)
expect(metrics).to receive(:gauge).with('vmpooler_health.tasks.ondemand.active', 2)
expect(metrics).to receive(:gauge).with('vmpooler_health.tasks.ondemand.pending', 5)
subject.push_health_metrics(metrics_data, 'healthy')
end