(POOLER-170) Revise vmpooler usage stats

Break down the usage stats into smaller groups so as to manage the
number of stat lines collected for Prometheus.

This may need some further revision to filter out Litmus stats, or
otherwise collect litmus usage information.
This commit is contained in:
John O'Connor 2020-06-16 17:11:33 +01:00
parent 72564de4b4
commit b6dcd77228
4 changed files with 126 additions and 67 deletions

View file

@ -43,21 +43,29 @@ module Vmpooler
},
param_labels: %i[template_name]
},
usage: {
user: {
mtype: M_COUNTER,
docstring: 'Number of Pool Instances of this created',
prom_metric_prefix: "#{@metrics_prefix}_usage",
docstring: 'Number of Pool Instances this user created created',
prom_metric_prefix: "#{@metrics_prefix}_user",
param_labels: %i[user poolname]
},
user: {
# This metrics is leads to a lot of label values which is likely to challenge data storage
# on prometheus - see Best Practices: https://prometheus.io/docs/practices/naming/#labels
# So it is likely that this metric may need to be simplified or broken into a number
# of smaller metrics to capture the detail without challenging prometheus
usage_jenkins_instance: {
mtype: M_COUNTER,
docstring: 'vmpooler user counters',
prom_metric_prefix: "#{@metrics_prefix}_user",
param_labels: %i[user instancex value_stream branch project job_name component_to_test poolname]
docstring: 'Pools by Jenkins Instance usage',
prom_metric_prefix: "#{@metrics_prefix}_usage_jenkins_instance",
param_labels: %i[jenkins_instance value_stream poolname]
},
usage_branch_project: {
mtype: M_COUNTER,
docstring: 'Pools by branch/project usage',
prom_metric_prefix: "#{@metrics_prefix}_usage_branch_project",
param_labels: %i[branch project poolname]
},
usage_job_component: {
mtype: M_COUNTER,
docstring: 'Pools by job/component usage',
prom_metric_prefix: "#{@metrics_prefix}_usage_job_component",
param_labels: %i[job_name component_to_test poolname]
},
checkout: {
mtype: M_COUNTER,

View file

@ -491,15 +491,16 @@ module Vmpooler
return if checkout.nil?
user ||= 'unauthenticated'
unless jenkins_build_url
user = user.gsub('.', '_')
$metrics.increment("usage.#{user}.#{poolname}")
return
end
user = user.gsub('.', '_')
$metrics.increment("user.#{user}.#{poolname}")
return unless jenkins_build_url
# TBD - Add Filter for Litmus here as well - to ignore for the moment.
url_parts = jenkins_build_url.split('/')[2..-1]
instance = url_parts[0]
jenkins_instance = url_parts[0].gsub('.', '_')
value_stream_parts = url_parts[2].split('_')
value_stream_parts = value_stream_parts.map { |s| s.gsub('.', '_') }
value_stream = value_stream_parts.shift
branch = value_stream_parts.pop
project = value_stream_parts.shift
@ -507,22 +508,9 @@ module Vmpooler
build_metadata_parts = url_parts[3]
component_to_test = component_to_test('RMM_COMPONENT_TO_TEST_NAME', build_metadata_parts)
metric_parts = [
'usage',
user,
instance,
value_stream,
branch,
project,
job_name,
component_to_test,
poolname
]
metric_parts = metric_parts.reject(&:nil?)
metric_parts = metric_parts.map { |s| s.gsub('.', '_') }
$metrics.increment(metric_parts.join('.'))
$metrics.increment("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{poolname}")
$metrics.increment("usage_branch_project.#{branch}.#{project}.#{poolname}")
$metrics.increment("usage_job_component.#{job_name}.#{component_to_test}.#{poolname}")
rescue StandardError => e
$logger.log('d', "[!] [#{poolname}] failed while evaluating usage labels on '#{vm}' with an error: #{e}")
raise
@ -537,7 +525,7 @@ module Vmpooler
next if value.nil?
return value if key == match
end
nil
'none'
end
def purge_unused_vms_and_folders