Add operation label to user metric and move from manager to api

This adds an "operation" label to the user metrics and moves incrementing from the manager to api, so that the user metrics show when resources are allocated, as well as destroyed. Previously, user metrics were only updated upon destroying a resource.

I think its better suited to increment the metric as part of the api instead of the pool_manger, because it's expected to do so when a user successfully checks out or deletes a VM, but can be problematic when doing so in the provider since it can clone VMs before actually being checked out by a user.
This commit is contained in:
Jake Spain 2021-08-10 10:55:04 -04:00
parent 3b6073933e
commit ca6833d156
No known key found for this signature in database
GPG key ID: BC1C4DA0A085E113
6 changed files with 81 additions and 322 deletions

View file

@ -210,6 +210,7 @@ module Vmpooler
account_for_starting_vm(vmpool, vmname)
vms << [vmpool, vmname, vmtemplate]
metrics.increment("checkout.success.#{vmpool}")
update_user_metrics('allocate', vmname) if Vmpooler::API.settings.config[:config]['usage_stats']
else
failed = true
metrics.increment("checkout.empty.#{requested}")
@ -235,6 +236,47 @@ module Vmpooler
result
end
def update_user_metrics(operation, vmname)
backend.multi
backend.hget("vmpooler__vm__#{vmname}", 'tag:jenkins_build_url')
backend.hget("vmpooler__vm__#{vmname}", 'token:user')
backend.hget("vmpooler__vm__#{vmname}", 'template')
jenkins_build_url, user, poolname = backend.exec
if user
user = user.gsub('.', '_')
else
user = 'unauthenticated'
end
metrics.increment("user.#{user}.#{operation}.#{poolname}")
if jenkins_build_url
if jenkins_build_url.include? 'litmus'
# Very simple filter for Litmus jobs - just count them coming through for the moment.
metrics.increment("usage_litmus.#{user}.#{operation}.#{poolname}")
return
end
url_parts = jenkins_build_url.split('/')[2..-1]
jenkins_instance = url_parts[0].gsub('.', '_')
value_stream_parts = url_parts[2].split('_')
value_stream_parts = value_stream_parts.map { |s| s.gsub('.', '_') }
value_stream = value_stream_parts.shift
branch = value_stream_parts.pop
project = value_stream_parts.shift
job_name = value_stream_parts.join('_')
build_metadata_parts = url_parts[3]
component_to_test = component_to_test('RMM_COMPONENT_TO_TEST_NAME', build_metadata_parts)
metrics.increment("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{operation}.#{poolname}")
metrics.increment("usage_branch_project.#{branch}.#{project}.#{operation}.#{poolname}")
metrics.increment("usage_job_component.#{job_name}.#{component_to_test}.#{operation}.#{poolname}")
end
rescue StandardError => e
puts 'd', "[!] [#{poolname}] failed while evaluating usage labels on '#{vmname}' with an error: #{e}"
raise
end
def update_pool_size(payload)
result = { 'ok' => false }
@ -1169,6 +1211,7 @@ module Vmpooler
status 200
result['ok'] = true
metrics.increment('delete.success')
update_user_metrics('destroy', params[:hostname]) if Vmpooler::API.settings.config[:config]['usage_stats']
else
metrics.increment('delete.failed')
end

View file

@ -165,33 +165,33 @@ module Vmpooler
},
user: {
mtype: M_COUNTER,
torun: %i[manager],
docstring: 'Number of pool instances this user created created',
param_labels: %i[user poolname]
torun: %i[api],
docstring: 'Number of pool instances and the operation performed by a user',
param_labels: %i[user operation poolname]
},
usage_litmus: {
mtype: M_COUNTER,
torun: %i[manager],
docstring: 'Pools by Litmus job usage',
param_labels: %i[user poolname]
torun: %i[api],
docstring: 'Number of pool instances and the operation performed by Litmus jobs',
param_labels: %i[user operation poolname]
},
usage_jenkins_instance: {
mtype: M_COUNTER,
torun: %i[manager],
docstring: 'Pools by Jenkins instance usage',
param_labels: %i[jenkins_instance value_stream poolname]
torun: %i[api],
docstring: 'Number of pool instances and the operation performed by Jenkins instances',
param_labels: %i[jenkins_instance value_stream operation poolname]
},
usage_branch_project: {
mtype: M_COUNTER,
torun: %i[manager],
docstring: 'Pools by branch/project usage',
param_labels: %i[branch project poolname]
torun: %i[api],
docstring: 'Number of pool instances and the operation performed by Litmus jobs by Jenkins branch/project',
param_labels: %i[branch project operation poolname]
},
usage_job_component: {
mtype: M_COUNTER,
torun: %i[manager],
docstring: 'Pools by job/component usage',
param_labels: %i[job_name component_to_test poolname]
torun: %i[api],
docstring: 'Number of pool instances and the operation performed by Litmus jobs Jenkins by job/component',
param_labels: %i[job_name component_to_test operation poolname]
},
checkout: {
mtype: M_COUNTER,

View file

@ -473,54 +473,11 @@ module Vmpooler
finish = format('%<time>.2f', time: Time.now - start)
$logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
$metrics.timing("destroy.#{pool}", finish)
get_vm_usage_labels(vm, redis)
end
end
dereference_mutex(vm)
end
def get_vm_usage_labels(vm, redis)
return unless $config[:config]['usage_stats']
redis.multi
redis.hget("vmpooler__vm__#{vm}", 'checkout')
redis.hget("vmpooler__vm__#{vm}", 'tag:jenkins_build_url')
redis.hget("vmpooler__vm__#{vm}", 'token:user')
redis.hget("vmpooler__vm__#{vm}", 'template')
checkout, jenkins_build_url, user, poolname = redis.exec
return if checkout.nil?
user ||= 'unauthenticated'
user = user.gsub('.', '_')
$metrics.increment("user.#{user}.#{poolname}")
return unless jenkins_build_url
if jenkins_build_url.include? 'litmus'
# Very simple filter for Litmus jobs - just count them coming through for the moment.
$metrics.increment("usage_litmus.#{user}.#{poolname}")
return
end
url_parts = jenkins_build_url.split('/')[2..-1]
jenkins_instance = url_parts[0].gsub('.', '_')
value_stream_parts = url_parts[2].split('_')
value_stream_parts = value_stream_parts.map { |s| s.gsub('.', '_') }
value_stream = value_stream_parts.shift
branch = value_stream_parts.pop
project = value_stream_parts.shift
job_name = value_stream_parts.join('_')
build_metadata_parts = url_parts[3]
component_to_test = component_to_test('RMM_COMPONENT_TO_TEST_NAME', build_metadata_parts)
$metrics.increment("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{poolname}")
$metrics.increment("usage_branch_project.#{branch}.#{project}.#{poolname}")
$metrics.increment("usage_job_component.#{job_name}.#{component_to_test}.#{poolname}")
rescue StandardError => e
$logger.log('d', "[!] [#{poolname}] failed while evaluating usage labels on '#{vm}' with an error: #{e}")
raise
end
def component_to_test(match, labels_string)
return if labels_string.nil?

View file

@ -1021,14 +1021,6 @@ EOT
subject._destroy_vm(vm,pool,provider)
end
it 'should check usage labels' do
redis_connection_pool.with do |redis|
expect(subject).to receive(:get_vm_usage_labels).with(vm, redis)
end
subject._destroy_vm(vm,pool,provider)
end
it 'should dereference the mutex' do
expect(subject).to receive(:dereference_mutex)
@ -1070,244 +1062,6 @@ EOT
end
end
describe '#get_vm_usage_labels' do
let(:template) { 'pool1' }
let(:user) { 'vmpuser' }
let(:vm) { 'vm1' }
context 'when label evaluation is disabled' do
it 'should do nothing' do
redis_connection_pool.with do |redis|
subject.get_vm_usage_labels(vm, redis)
end
end
end
context 'when label evaluation is enabled' do
before(:each) do
config[:config]['usage_stats'] = true
end
context 'when a VM has not been checked out' do
before(:each) do
redis_connection_pool.with do |redis|
create_ready_vm(template, vm, redis)
end
end
it 'should return' do
expect(subject).to receive(:get_vm_usage_labels).and_return(nil)
redis_connection_pool.with do |redis|
subject.get_vm_usage_labels(vm, redis)
end
end
end
context 'when a VM has been checked out' do
context 'without auth' do
before(:each) do
redis_connection_pool.with do |redis|
create_running_vm(template, vm, redis)
end
end
it 'should emit a metric' do
redis_connection_pool.with do |redis|
expect(metrics).to receive(:increment).with("user.unauthenticated.#{template}")
subject.get_vm_usage_labels(vm, redis)
end
end
end
context 'with auth' do
before(:each) do
redis_connection_pool.with do |redis|
create_running_vm(template, vm, redis, token, user)
end
end
it 'should emit a metric' do
expect(metrics).to receive(:increment).with("user.#{user}.#{template}")
expect(metrics).not_to receive(:increment)
redis_connection_pool.with do |redis|
subject.get_vm_usage_labels(vm, redis)
end
end
context 'with a user with period in name' do
let(:user) { 'test.user'.gsub('.', '_') }
let(:metric_string) { "user.#{user}.#{template}" }
let(:metric_nodes) { metric_string.split('.') }
before(:each) do
redis_connection_pool.with do |redis|
create_running_vm(template, vm, redis)
end
end
it 'should emit a metric with the character replaced' do
expect(metrics).to receive(:increment).with(metric_string)
expect(metrics).not_to receive(:increment)
redis_connection_pool.with do |redis|
subject.get_vm_usage_labels(vm, redis)
end
end
it 'should include three nodes' do
expect(metric_nodes.count).to eq(3)
end
end
context 'with a jenkins_build_url label' do
let(:jenkins_build_url) { 'https://jenkins.example.com/job/enterprise_pe-acceptance-tests_integration-system_pe_full-agent-upgrade_weekend_2018.1.x/LAYOUT=centos6-64mcd-ubuntu1404-32f-64f,LEGACY_AGENT_VERSION=NONE,PLATFORM=NOTUSED,SCM_BRANCH=2018.1.x,UPGRADE_FROM=2018.1.0,UPGRADE_TO_VERSION=NONE,label=beaker/222/' }
let(:url_parts) { jenkins_build_url.split('/')[2..-1] }
let(:instance) { url_parts[0] }
let(:value_stream_parts) { url_parts[2].split('_') }
let(:value_stream) { value_stream_parts.shift }
let(:branch) { value_stream_parts.pop }
let(:project) { value_stream_parts.shift }
let(:job_name) { value_stream_parts.join('_') }
let(:metric_string_1) { "user.#{user}.#{template}" }
let(:metric_string_2) { "usage_jenkins_instance.#{instance.gsub('.', '_')}.#{value_stream.gsub('.', '_')}.#{template}" }
let(:metric_string_3) { "usage_branch_project.#{branch.gsub('.', '_')}.#{project.gsub('.', '_')}.#{template}" }
let(:metric_string_4) { "usage_job_component.#{job_name.gsub('.', '_')}.none.#{template}" }
before(:each) do
redis_connection_pool.with do |redis|
create_tag(vm, 'jenkins_build_url', jenkins_build_url, redis)
end
end
it 'should emit 4 metric withs information from the URL' do
expect(metrics).to receive(:increment).with(metric_string_1)
expect(metrics).to receive(:increment).with(metric_string_2)
expect(metrics).to receive(:increment).with(metric_string_3)
expect(metrics).to receive(:increment).with(metric_string_4)
expect(metrics).not_to receive(:increment)
redis_connection_pool.with do |redis|
subject.get_vm_usage_labels(vm, redis)
end
end
end
context 'with a jenkins_build_url that contains RMM_COMPONENT_TO_TEST_NAME' do
let(:jenkins_build_url) { 'https://jenkins.example.com/job/platform_puppet-agent-extra_puppet-agent-integration-suite_pr/RMM_COMPONENT_TO_TEST_NAME=puppet,SLAVE_LABEL=beaker,TEST_TARGET=redhat7-64a/824/' }
let(:url_parts) { jenkins_build_url.split('/')[2..-1] }
let(:instance) { url_parts[0].gsub('.', '_') }
let(:value_stream_parts) { url_parts[2].split('_') }
let(:value_stream) { value_stream_parts.shift }
let(:branch) { value_stream_parts.pop }
let(:project) { value_stream_parts.shift }
let(:job_name) { value_stream_parts.join('_') }
let(:build_metadata) { url_parts[3] }
let(:build_component) { subject.component_to_test('RMM_COMPONENT_TO_TEST_NAME', build_metadata) }
let(:expected_string) { "usage.#{user}.#{instance}.#{value_stream}.#{branch}.#{project}.#{job_name}.#{build_component}.#{template}" }
let(:metric_nodes) { expected_string.split('.') }
let(:metric_string_1) { "user.#{user}.#{template}" }
let(:metric_string_2) { "usage_jenkins_instance.#{instance.gsub('.', '_')}.#{value_stream.gsub('.', '_')}.#{template}" }
let(:metric_string_3) { "usage_branch_project.#{branch.gsub('.', '_')}.#{project.gsub('.', '_')}.#{template}" }
let(:metric_string_4) { "usage_job_component.#{job_name.gsub('.', '_')}.#{build_component}.#{template}" }
before(:each) do
redis_connection_pool.with do |redis|
create_tag(vm, 'jenkins_build_url', jenkins_build_url, redis)
end
end
it 'should emit 4 metrics with information from the URL' do
expect(metrics).to receive(:increment).with(metric_string_1)
expect(metrics).to receive(:increment).with(metric_string_2)
expect(metrics).to receive(:increment).with(metric_string_3)
expect(metrics).to receive(:increment).with(metric_string_4)
expect(metrics).not_to receive(:increment)
redis_connection_pool.with do |redis|
subject.get_vm_usage_labels(vm, redis)
end
end
it 'should contain exactly nine nodes' do
expect(metric_nodes.count).to eq(9)
end
context 'when there is no matrix job information' do
let(:jenkins_build_url) { 'https://jenkins.example.com/job/platform_puppet-agent-extra_puppet-agent-integration-suite_pr/824/' }
let(:url_parts) { jenkins_build_url.split('/')[2..-1] }
let(:instance) { url_parts[0].gsub('.', '_') }
let(:value_stream_parts) { url_parts[2].split('_') }
let(:value_stream) { value_stream_parts.shift }
let(:branch) { value_stream_parts.pop }
let(:project) { value_stream_parts.shift }
let(:job_name) { value_stream_parts.join('_') }
let(:metric_string_1) { "user.#{user}.#{template}" }
let(:metric_string_2) { "usage_jenkins_instance.#{instance.gsub('.', '_')}.#{value_stream.gsub('.', '_')}.#{template}" }
let(:metric_string_3) { "usage_branch_project.#{branch.gsub('.', '_')}.#{project.gsub('.', '_')}.#{template}" }
let(:metric_string_4) { "usage_job_component.#{job_name.gsub('.', '_')}.none.#{template}" }
before(:each) do
redis_connection_pool.with do |redis|
create_tag(vm, 'jenkins_build_url', jenkins_build_url, redis)
end
end
it 'should emit 4 metrics with information from the URL without a build_component' do
expect(metrics).to receive(:increment).with(metric_string_1)
expect(metrics).to receive(:increment).with(metric_string_2)
expect(metrics).to receive(:increment).with(metric_string_3)
expect(metrics).to receive(:increment).with(metric_string_4)
expect(metrics).not_to receive(:increment)
redis_connection_pool.with do |redis|
subject.get_vm_usage_labels(vm, redis)
end
end
end
end
context 'with a litmus job' do
let(:jenkins_build_url) { 'https://litmus_manual' }
let(:metric_string_1) { "user.#{user}.#{template}" }
let(:metric_string_2) { "usage_litmus.#{user}.#{template}" }
before(:each) do
redis_connection_pool.with do |redis|
create_tag(vm, 'jenkins_build_url', jenkins_build_url, redis)
end
end
it 'should emit 2 metrics with the second indicating a litmus job' do
expect(metrics).to receive(:increment).with(metric_string_1)
expect(metrics).to receive(:increment).with(metric_string_2)
expect(metrics).not_to receive(:increment)
redis_connection_pool.with do |redis|
subject.get_vm_usage_labels(vm, redis)
end
end
end
end
end
end
end
describe '#component_to_test' do
let(:matching_key) { 'LABEL_ONE' }
let(:matching_value) { 'test' }

View file

@ -198,46 +198,51 @@ describe 'prometheus' do
po.get(labels: metric[:labels])
}.by(1)
end
it 'Increments user.#{user}.#{poolname}' do
it 'Increments user.#{user}.#{operation}.#{poolname}' do
user = 'myuser'
operation = 'allocate'
poolname = 'test-pool'
expect { subject.increment("user.#{user}.#{poolname}") }.to change {
metric, po = subject.get("user.#{user}.#{poolname}")
expect { subject.increment("user.#{user}.#{operation}.#{poolname}") }.to change {
metric, po = subject.get("user.#{user}.#{operation}.#{poolname}")
po.get(labels: metric[:labels])
}.by(1)
end
it 'Increments usage_litmus.#{user}.#{poolname}' do
it 'Increments usage_litmus.#{user}.#{operation}.#{poolname}' do
user = 'myuser'
operation = 'allocate'
poolname = 'test-pool'
expect { subject.increment("usage_litmus.#{user}.#{poolname}") }.to change {
metric, po = subject.get("usage_litmus.#{user}.#{poolname}")
expect { subject.increment("usage_litmus.#{user}.#{operation}.#{poolname}") }.to change {
metric, po = subject.get("usage_litmus.#{user}.#{operation}.#{poolname}")
po.get(labels: metric[:labels])
}.by(1)
end
it 'Increments label usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{poolname}' do
it 'Increments label usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{operation}.#{poolname}' do
jenkins_instance = 'jenkins_test_instance'
value_stream = 'notional_value'
operation = 'allocate'
poolname = 'test-pool'
expect { subject.increment("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{poolname}") }.to change {
metric, po = subject.get("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{poolname}")
expect { subject.increment("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{operation}.#{poolname}") }.to change {
metric, po = subject.get("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{operation}.#{poolname}")
po.get(labels: metric[:labels])
}.by(1)
end
it 'Increments label usage_branch_project.#{branch}.#{project}.#{poolname}' do
it 'Increments label usage_branch_project.#{branch}.#{project}.#{operation}.#{poolname}' do
branch = 'treetop'
project = 'test-project'
operation = 'allocate'
poolname = 'test-pool'
expect { subject.increment("usage_branch_project.#{branch}.#{project}.#{poolname}") }.to change {
metric, po = subject.get("usage_branch_project.#{branch}.#{project}.#{poolname}")
expect { subject.increment("usage_branch_project.#{branch}.#{project}.#{operation}.#{poolname}") }.to change {
metric, po = subject.get("usage_branch_project.#{branch}.#{project}.#{operation}.#{poolname}")
po.get(labels: metric[:labels])
}.by(1)
end
it 'Increments label usage_job_component.#{job_name}.#{component_to_test}.#{poolname}' do
it 'Increments label usage_job_component.#{job_name}.#{component_to_test}.#{operation}.#{poolname}' do
job_name = 'a-job'
component_to_test = 'component-name'
operation = 'allocate'
poolname = 'test-pool'
expect { subject.increment("usage_job_component.#{job_name}.#{component_to_test}.#{poolname}") }.to change {
metric, po = subject.get("usage_job_component.#{job_name}.#{component_to_test}.#{poolname}")
expect { subject.increment("usage_job_component.#{job_name}.#{component_to_test}.#{operation}.#{poolname}") }.to change {
metric, po = subject.get("usage_job_component.#{job_name}.#{component_to_test}.#{operation}.#{poolname}")
po.get(labels: metric[:labels])
}.by(1)
end

View file

@ -532,13 +532,13 @@
#
# - usage_stats
# Enable shipping of VM usage stats
# When enabled a metric is emitted when a machine is destroyed. Tags are inspected and used to organize
# When enabled a metric is emitted when a user requested to allocate and destroy a VM. Tags are inspected and used to organize
# shipped metrics if there is a jenkins_build_url tag set for the VM.
# Without the jenkins_build_url tag set the metric will be sent as "usage.$user.$pool_name".
# Without the jenkins_build_url tag set the metric will be sent as "usage.$user.$operation.$pool_name".
# When the jenkins_build_url tag is set the metric will be sent with additional data. Here is an example
# based off of the following URL, and requested by the user ABS;
# https://jenkins.example.com/job/platform_puppet-agent-extra_puppet-agent-integration-suite_pr/RMM_COMPONENT_TO_TEST_NAME=puppet,SLAVE_LABEL=beaker,TEST_TARGET=redhat7-64a/824/
# "usage.$user.$instance.$value_stream.$branch.$project.$job_name.$component_to_test.$pool_name", which translates to
# "usage.$user.$instance.$value_stream.$branch.$project.$job_name.$component_to_test.$operation.$pool_name", which translates to
# "usage.$user.jenkins_example_com.platform.pr.puppet-agent-extra.puppet-agent-integration-suite.puppet.$pool_name"
# Expects a boolean value
# (optional; default: false)