Add operation label to user metric and move from manager to api

This adds an "operation" label to the user metrics and moves incrementing from the manager to api, so that the user metrics show when resources are allocated, as well as destroyed. Previously, user metrics were only updated upon destroying a resource.

I think its better suited to increment the metric as part of the api instead of the pool_manger, because it's expected to do so when a user successfully checks out or deletes a VM, but can be problematic when doing so in the provider since it can clone VMs before actually being checked out by a user.
This commit is contained in:
Jake Spain 2021-08-10 10:55:04 -04:00
parent 3b6073933e
commit c06cfc28f7
No known key found for this signature in database
GPG key ID: BC1C4DA0A085E113
6 changed files with 80 additions and 322 deletions

View file

@ -210,6 +210,7 @@ module Vmpooler
account_for_starting_vm(vmpool, vmname)
vms << [vmpool, vmname, vmtemplate]
metrics.increment("checkout.success.#{vmpool}")
update_user_metrics('allocate', vmname) if Vmpooler::API.settings.config[:config]['usage_stats']
else
failed = true
metrics.increment("checkout.empty.#{requested}")
@ -235,6 +236,46 @@ module Vmpooler
result
end
def update_user_metrics(operation, vmname)
backend.multi
backend.hget("vmpooler__vm__#{vmname}", 'tag:jenkins_build_url')
backend.hget("vmpooler__vm__#{vmname}", 'token:user')
backend.hget("vmpooler__vm__#{vmname}", 'template')
jenkins_build_url, user, poolname = backend.exec
if user
user = user.gsub('.', '_')
else
user = 'unauthenticated'
end
metrics.increment("user.#{user}.#{operation}.#{poolname}")
if jenkins_build_url
if jenkins_build_url.include? 'litmus'
# Very simple filter for Litmus jobs - just count them coming through for the moment.
metrics.increment("usage_litmus.#{user}.#{operation}.#{poolname}")
return
end
url_parts = jenkins_build_url.split('/')[2..-1]
jenkins_instance = url_parts[0].gsub('.', '_')
value_stream_parts = url_parts[2].split('_')
value_stream_parts = value_stream_parts.map { |s| s.gsub('.', '_') }
value_stream = value_stream_parts.shift
branch = value_stream_parts.pop
project = value_stream_parts.shift
job_name = value_stream_parts.join('_')
build_metadata_parts = url_parts[3]
component_to_test = component_to_test('RMM_COMPONENT_TO_TEST_NAME', build_metadata_parts)
metrics.increment("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{operation}.#{poolname}")
metrics.increment("usage_branch_project.#{branch}.#{project}.#{operation}.#{poolname}")
metrics.increment("usage_job_component.#{job_name}.#{component_to_test}.#{operation}.#{poolname}")
end
rescue StandardError => e
puts 'd', "[!] [#{poolname}] failed while evaluating usage labels on '#{vmname}' with an error: #{e}"
end
def update_pool_size(payload)
result = { 'ok' => false }
@ -1169,6 +1210,7 @@ module Vmpooler
status 200
result['ok'] = true
metrics.increment('delete.success')
update_user_metrics('destroy', params[:hostname]) if Vmpooler::API.settings.config[:config]['usage_stats']
else
metrics.increment('delete.failed')
end

View file

@ -165,33 +165,33 @@ module Vmpooler
},
user: {
mtype: M_COUNTER,
torun: %i[manager],
docstring: 'Number of pool instances this user created created',
param_labels: %i[user poolname]
torun: %i[api],
docstring: 'Number of pool instances and the operation performed by a user',
param_labels: %i[user operation poolname]
},
usage_litmus: {
mtype: M_COUNTER,
torun: %i[manager],
docstring: 'Pools by Litmus job usage',
param_labels: %i[user poolname]
torun: %i[api],
docstring: 'Number of pool instances and the operation performed by Litmus jobs',
param_labels: %i[user operation poolname]
},
usage_jenkins_instance: {
mtype: M_COUNTER,
torun: %i[manager],
docstring: 'Pools by Jenkins instance usage',
param_labels: %i[jenkins_instance value_stream poolname]
torun: %i[api],
docstring: 'Number of pool instances and the operation performed by Jenkins instances',
param_labels: %i[jenkins_instance value_stream operation poolname]
},
usage_branch_project: {
mtype: M_COUNTER,
torun: %i[manager],
docstring: 'Pools by branch/project usage',
param_labels: %i[branch project poolname]
torun: %i[api],
docstring: 'Number of pool instances and the operation performed by Jenkins branch/project',
param_labels: %i[branch project operation poolname]
},
usage_job_component: {
mtype: M_COUNTER,
torun: %i[manager],
docstring: 'Pools by job/component usage',
param_labels: %i[job_name component_to_test poolname]
torun: %i[api],
docstring: 'Number of pool instances and the operation performed by Jenkins job/component',
param_labels: %i[job_name component_to_test operation poolname]
},
checkout: {
mtype: M_COUNTER,

View file

@ -473,54 +473,11 @@ module Vmpooler
finish = format('%<time>.2f', time: Time.now - start)
$logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
$metrics.timing("destroy.#{pool}", finish)
get_vm_usage_labels(vm, redis)
end
end
dereference_mutex(vm)
end
def get_vm_usage_labels(vm, redis)
return unless $config[:config]['usage_stats']
redis.multi
redis.hget("vmpooler__vm__#{vm}", 'checkout')
redis.hget("vmpooler__vm__#{vm}", 'tag:jenkins_build_url')
redis.hget("vmpooler__vm__#{vm}", 'token:user')
redis.hget("vmpooler__vm__#{vm}", 'template')
checkout, jenkins_build_url, user, poolname = redis.exec
return if checkout.nil?
user ||= 'unauthenticated'
user = user.gsub('.', '_')
$metrics.increment("user.#{user}.#{poolname}")
return unless jenkins_build_url
if jenkins_build_url.include? 'litmus'
# Very simple filter for Litmus jobs - just count them coming through for the moment.
$metrics.increment("usage_litmus.#{user}.#{poolname}")
return
end
url_parts = jenkins_build_url.split('/')[2..-1]
jenkins_instance = url_parts[0].gsub('.', '_')
value_stream_parts = url_parts[2].split('_')
value_stream_parts = value_stream_parts.map { |s| s.gsub('.', '_') }
value_stream = value_stream_parts.shift
branch = value_stream_parts.pop
project = value_stream_parts.shift
job_name = value_stream_parts.join('_')
build_metadata_parts = url_parts[3]
component_to_test = component_to_test('RMM_COMPONENT_TO_TEST_NAME', build_metadata_parts)
$metrics.increment("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{poolname}")
$metrics.increment("usage_branch_project.#{branch}.#{project}.#{poolname}")
$metrics.increment("usage_job_component.#{job_name}.#{component_to_test}.#{poolname}")
rescue StandardError => e
$logger.log('d', "[!] [#{poolname}] failed while evaluating usage labels on '#{vm}' with an error: #{e}")
raise
end
def component_to_test(match, labels_string)
return if labels_string.nil?