diff --git a/.travis.yml b/.travis.yml index b4a51b0..4469cd6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,16 +5,13 @@ os: linux jobs: include: - - rvm: 2.4.9 + - rvm: 2.5.8 env: "CHECK=rubocop" - - rvm: 2.4.9 + - rvm: 2.5.8 env: "CHECK=test" - - rvm: 2.5.7 - env: "CHECK=test" - - - rvm: jruby-9.2.9.0 + - rvm: jruby-9.2.12.0 env: "CHECK=test" script: diff --git a/bin/vmpooler b/bin/vmpooler index 139d79c..4d8af45 100755 --- a/bin/vmpooler +++ b/bin/vmpooler @@ -9,6 +9,7 @@ redis_port = config[:redis]['port'] redis_password = config[:redis]['password'] redis_connection_pool_size = config[:redis]['connection_pool_size'] redis_connection_pool_timeout = config[:redis]['connection_pool_timeout'] +redis_reconnect_attempts = config[:redis]['reconnect_attempts'] logger_file = config[:config]['logfile'] logger = Vmpooler::Logger.new logger_file @@ -43,7 +44,7 @@ if torun.include? :manager Vmpooler::PoolManager.new( config, logger, - Vmpooler.redis_connection_pool(redis_host, redis_port, redis_password, redis_connection_pool_size, redis_connection_pool_timeout, metrics), + Vmpooler.redis_connection_pool(redis_host, redis_port, redis_password, redis_connection_pool_size, redis_connection_pool_timeout, metrics, redis_reconnect_attempts), metrics ).execute! end diff --git a/lib/vmpooler.rb b/lib/vmpooler.rb index 276bfa7..d6e5522 100644 --- a/lib/vmpooler.rb +++ b/lib/vmpooler.rb @@ -58,59 +58,58 @@ module Vmpooler end # Set some configuration defaults - parsed_config[:config]['task_limit'] = string_to_int(ENV['TASK_LIMIT']) || parsed_config[:config]['task_limit'] || 10 - parsed_config[:config]['ondemand_clone_limit'] = string_to_int(ENV['ONDEMAND_CLONE_LIMIT']) || parsed_config[:config]['ondemand_clone_limit'] || 10 + parsed_config[:config]['task_limit'] = string_to_int(ENV['TASK_LIMIT']) || parsed_config[:config]['task_limit'] || 10 + parsed_config[:config]['ondemand_clone_limit'] = string_to_int(ENV['ONDEMAND_CLONE_LIMIT']) || parsed_config[:config]['ondemand_clone_limit'] || 10 parsed_config[:config]['max_ondemand_instances_per_request'] = string_to_int(ENV['MAX_ONDEMAND_INSTANCES_PER_REQUEST']) || parsed_config[:config]['max_ondemand_instances_per_request'] || 10 - parsed_config[:config]['migration_limit'] = string_to_int(ENV['MIGRATION_LIMIT']) if ENV['MIGRATION_LIMIT'] - parsed_config[:config]['vm_checktime'] = string_to_int(ENV['VM_CHECKTIME']) || parsed_config[:config]['vm_checktime'] || 1 - parsed_config[:config]['vm_lifetime'] = string_to_int(ENV['VM_LIFETIME']) || parsed_config[:config]['vm_lifetime'] || 24 - parsed_config[:config]['max_lifetime_upper_limit'] = string_to_int(ENV['MAX_LIFETIME_UPPER_LIMIT']) || parsed_config[:config]['max_lifetime_upper_limit'] - parsed_config[:config]['ready_ttl'] = string_to_int(ENV['READY_TTL']) || parsed_config[:config]['ready_ttl'] || 60 - parsed_config[:config]['ondemand_request_ttl'] = string_to_int(ENV['ONDEMAND_REQUEST_TTL']) || parsed_config[:config]['ondemand_request_ttl'] || 5 - parsed_config[:config]['prefix'] = ENV['PREFIX'] || parsed_config[:config]['prefix'] || '' - - parsed_config[:config]['logfile'] = ENV['LOGFILE'] if ENV['LOGFILE'] - - parsed_config[:config]['site_name'] = ENV['SITE_NAME'] if ENV['SITE_NAME'] - parsed_config[:config]['domain'] = ENV['DOMAIN'] if ENV['DOMAIN'] - parsed_config[:config]['clone_target'] = ENV['CLONE_TARGET'] if ENV['CLONE_TARGET'] - parsed_config[:config]['timeout'] = string_to_int(ENV['TIMEOUT']) if ENV['TIMEOUT'] - parsed_config[:config]['vm_lifetime_auth'] = string_to_int(ENV['VM_LIFETIME_AUTH']) if ENV['VM_LIFETIME_AUTH'] - parsed_config[:config]['max_tries'] = string_to_int(ENV['MAX_TRIES']) if ENV['MAX_TRIES'] - parsed_config[:config]['retry_factor'] = string_to_int(ENV['RETRY_FACTOR']) if ENV['RETRY_FACTOR'] - parsed_config[:config]['create_folders'] = true?(ENV['CREATE_FOLDERS']) if ENV['CREATE_FOLDERS'] - parsed_config[:config]['create_template_delta_disks'] = ENV['CREATE_TEMPLATE_DELTA_DISKS'] if ENV['CREATE_TEMPLATE_DELTA_DISKS'] + parsed_config[:config]['migration_limit'] = string_to_int(ENV['MIGRATION_LIMIT']) if ENV['MIGRATION_LIMIT'] + parsed_config[:config]['vm_checktime'] = string_to_int(ENV['VM_CHECKTIME']) || parsed_config[:config]['vm_checktime'] || 1 + parsed_config[:config]['vm_lifetime'] = string_to_int(ENV['VM_LIFETIME']) || parsed_config[:config]['vm_lifetime'] || 24 + parsed_config[:config]['max_lifetime_upper_limit'] = string_to_int(ENV['MAX_LIFETIME_UPPER_LIMIT']) || parsed_config[:config]['max_lifetime_upper_limit'] + parsed_config[:config]['ready_ttl'] = string_to_int(ENV['READY_TTL']) || parsed_config[:config]['ready_ttl'] || 60 + parsed_config[:config]['ondemand_request_ttl'] = string_to_int(ENV['ONDEMAND_REQUEST_TTL']) || parsed_config[:config]['ondemand_request_ttl'] || 5 + parsed_config[:config]['prefix'] = ENV['PREFIX'] || parsed_config[:config]['prefix'] || '' + parsed_config[:config]['logfile'] = ENV['LOGFILE'] if ENV['LOGFILE'] + parsed_config[:config]['site_name'] = ENV['SITE_NAME'] if ENV['SITE_NAME'] + parsed_config[:config]['domain'] = ENV['DOMAIN'] if ENV['DOMAIN'] + parsed_config[:config]['clone_target'] = ENV['CLONE_TARGET'] if ENV['CLONE_TARGET'] + parsed_config[:config]['timeout'] = string_to_int(ENV['TIMEOUT']) if ENV['TIMEOUT'] + parsed_config[:config]['vm_lifetime_auth'] = string_to_int(ENV['VM_LIFETIME_AUTH']) if ENV['VM_LIFETIME_AUTH'] + parsed_config[:config]['max_tries'] = string_to_int(ENV['MAX_TRIES']) if ENV['MAX_TRIES'] + parsed_config[:config]['retry_factor'] = string_to_int(ENV['RETRY_FACTOR']) if ENV['RETRY_FACTOR'] + parsed_config[:config]['create_folders'] = true?(ENV['CREATE_FOLDERS']) if ENV['CREATE_FOLDERS'] + parsed_config[:config]['experimental_features'] = ENV['EXPERIMENTAL_FEATURES'] if ENV['EXPERIMENTAL_FEATURES'] + parsed_config[:config]['purge_unconfigured_folders'] = ENV['PURGE_UNCONFIGURED_FOLDERS'] if ENV['PURGE_UNCONFIGURED_FOLDERS'] + parsed_config[:config]['usage_stats'] = ENV['USAGE_STATS'] if ENV['USAGE_STATS'] + parsed_config[:config]['request_logger'] = ENV['REQUEST_LOGGER'] if ENV['REQUEST_LOGGER'] + parsed_config[:config]['create_template_delta_disks'] = ENV['CREATE_TEMPLATE_DELTA_DISKS'] if ENV['CREATE_TEMPLATE_DELTA_DISKS'] set_linked_clone(parsed_config) - parsed_config[:config]['experimental_features'] = ENV['EXPERIMENTAL_FEATURES'] if ENV['EXPERIMENTAL_FEATURES'] - parsed_config[:config]['purge_unconfigured_folders'] = ENV['PURGE_UNCONFIGURED_FOLDERS'] if ENV['PURGE_UNCONFIGURED_FOLDERS'] - parsed_config[:config]['usage_stats'] = ENV['USAGE_STATS'] if ENV['USAGE_STATS'] - parsed_config[:config]['request_logger'] = ENV['REQUEST_LOGGER'] if ENV['REQUEST_LOGGER'] - parsed_config[:redis] = parsed_config[:redis] || {} - parsed_config[:redis]['server'] = ENV['REDIS_SERVER'] || parsed_config[:redis]['server'] || 'localhost' - parsed_config[:redis]['port'] = string_to_int(ENV['REDIS_PORT']) if ENV['REDIS_PORT'] - parsed_config[:redis]['password'] = ENV['REDIS_PASSWORD'] if ENV['REDIS_PASSWORD'] - parsed_config[:redis]['data_ttl'] = string_to_int(ENV['REDIS_DATA_TTL']) || parsed_config[:redis]['data_ttl'] || 168 - parsed_config[:redis]['connection_pool_size'] = string_to_int(ENV['REDIS_CONNECTION_POOL_SIZE']) || parsed_config[:redis]['connection_pool_size'] || 10 + parsed_config[:redis] = parsed_config[:redis] || {} + parsed_config[:redis]['server'] = ENV['REDIS_SERVER'] || parsed_config[:redis]['server'] || 'localhost' + parsed_config[:redis]['port'] = string_to_int(ENV['REDIS_PORT']) if ENV['REDIS_PORT'] + parsed_config[:redis]['password'] = ENV['REDIS_PASSWORD'] if ENV['REDIS_PASSWORD'] + parsed_config[:redis]['data_ttl'] = string_to_int(ENV['REDIS_DATA_TTL']) || parsed_config[:redis]['data_ttl'] || 168 + parsed_config[:redis]['connection_pool_size'] = string_to_int(ENV['REDIS_CONNECTION_POOL_SIZE']) || parsed_config[:redis]['connection_pool_size'] || 10 parsed_config[:redis]['connection_pool_timeout'] = string_to_int(ENV['REDIS_CONNECTION_POOL_TIMEOUT']) || parsed_config[:redis]['connection_pool_timeout'] || 5 + parsed_config[:redis]['reconnect_attempts'] = string_to_int(ENV['REDIS_RECONNECT_ATTEMPTS']) || parsed_config[:redis]['reconnect_attempts'] || 10 - parsed_config[:statsd] = parsed_config[:statsd] || {} if ENV['STATSD_SERVER'] + parsed_config[:statsd] = parsed_config[:statsd] || {} if ENV['STATSD_SERVER'] parsed_config[:statsd]['server'] = ENV['STATSD_SERVER'] if ENV['STATSD_SERVER'] parsed_config[:statsd]['prefix'] = ENV['STATSD_PREFIX'] if ENV['STATSD_PREFIX'] - parsed_config[:statsd]['port'] = string_to_int(ENV['STATSD_PORT']) if ENV['STATSD_PORT'] + parsed_config[:statsd]['port'] = string_to_int(ENV['STATSD_PORT']) if ENV['STATSD_PORT'] - parsed_config[:graphite] = parsed_config[:graphite] || {} if ENV['GRAPHITE_SERVER'] + parsed_config[:graphite] = parsed_config[:graphite] || {} if ENV['GRAPHITE_SERVER'] parsed_config[:graphite]['server'] = ENV['GRAPHITE_SERVER'] if ENV['GRAPHITE_SERVER'] parsed_config[:graphite]['prefix'] = ENV['GRAPHITE_PREFIX'] if ENV['GRAPHITE_PREFIX'] - parsed_config[:graphite]['port'] = string_to_int(ENV['GRAPHITE_PORT']) if ENV['GRAPHITE_PORT'] + parsed_config[:graphite]['port'] = string_to_int(ENV['GRAPHITE_PORT']) if ENV['GRAPHITE_PORT'] parsed_config[:auth] = parsed_config[:auth] || {} if ENV['AUTH_PROVIDER'] if parsed_config.key? :auth - parsed_config[:auth]['provider'] = ENV['AUTH_PROVIDER'] if ENV['AUTH_PROVIDER'] - parsed_config[:auth][:ldap] = parsed_config[:auth][:ldap] || {} if parsed_config[:auth]['provider'] == 'ldap' - parsed_config[:auth][:ldap]['host'] = ENV['LDAP_HOST'] if ENV['LDAP_HOST'] - parsed_config[:auth][:ldap]['port'] = string_to_int(ENV['LDAP_PORT']) if ENV['LDAP_PORT'] - parsed_config[:auth][:ldap]['base'] = ENV['LDAP_BASE'] if ENV['LDAP_BASE'] + parsed_config[:auth]['provider'] = ENV['AUTH_PROVIDER'] if ENV['AUTH_PROVIDER'] + parsed_config[:auth][:ldap] = parsed_config[:auth][:ldap] || {} if parsed_config[:auth]['provider'] == 'ldap' + parsed_config[:auth][:ldap]['host'] = ENV['LDAP_HOST'] if ENV['LDAP_HOST'] + parsed_config[:auth][:ldap]['port'] = string_to_int(ENV['LDAP_PORT']) if ENV['LDAP_PORT'] + parsed_config[:auth][:ldap]['base'] = ENV['LDAP_BASE'] if ENV['LDAP_BASE'] parsed_config[:auth][:ldap]['user_object'] = ENV['LDAP_USER_OBJECT'] if ENV['LDAP_USER_OBJECT'] end @@ -164,7 +163,7 @@ module Vmpooler pools end - def self.redis_connection_pool(host, port, password, size, timeout, metrics) + def self.redis_connection_pool(host, port, password, size, timeout, metrics, redis_reconnect_attempts = 0) Vmpooler::PoolManager::GenericConnectionPool.new( metrics: metrics, connpool_type: 'redis_connection_pool', @@ -173,13 +172,14 @@ module Vmpooler timeout: timeout ) do connection = Concurrent::Hash.new - redis = new_redis(host, port, password) + redis = new_redis(host, port, password, redis_reconnect_attempts) connection['connection'] = redis end end - def self.new_redis(host = 'localhost', port = nil, password = nil) - Redis.new(host: host, port: port, password: password) + def self.new_redis(host = 'localhost', port = nil, password = nil, redis_reconnect_attempts = 10) + Redis.new(host: host, port: port, password: password, reconnect_attempts: redis_reconnect_attempts, reconnect_delay: 1.5, + reconnect_delay_max: 10.0) end def self.pools(conf) diff --git a/lib/vmpooler/api.rb b/lib/vmpooler/api.rb index 9ffe386..7d72ec6 100644 --- a/lib/vmpooler/api.rb +++ b/lib/vmpooler/api.rb @@ -38,8 +38,8 @@ module Vmpooler # Using customised collector that filters out hostnames on API paths require 'vmpooler/metrics/promstats/collector_middleware' require 'prometheus/middleware/exporter' - use Vmpooler::Metrics::Promstats::CollectorMiddleware, metrics_prefix: "#{metrics.metrics_prefix}_http" - use Prometheus::Middleware::Exporter, path: metrics.endpoint + use Vmpooler::Metrics::Promstats::CollectorMiddleware, metrics_prefix: "#{metrics.prometheus_prefix}_http" + use Prometheus::Middleware::Exporter, path: metrics.prometheus_endpoint end if torun.include? :api diff --git a/lib/vmpooler/api/v1.rb b/lib/vmpooler/api/v1.rb index 8c150ff..066af86 100644 --- a/lib/vmpooler/api/v1.rb +++ b/lib/vmpooler/api/v1.rb @@ -89,18 +89,16 @@ module Vmpooler template_backends += aliases weighted_pools = get_pool_weights(template_backends) - pickup = Pickup.new(weighted_pools) if weighted_pools.count == template_backends.count - count.to_i.times do - if pickup + if weighted_pools.count > 1 && weighted_pools.count == template_backends.count + pickup = Pickup.new(weighted_pools) + count.to_i.times do selection << pickup.pick - else + end + else + count.to_i.times do selection << template_backends.sample end end - else - count.to_i.times do - selection << template - end end count_selection(selection) @@ -809,7 +807,7 @@ module Vmpooler post "#{api_prefix}/ondemandvm/?" do content_type :json - metrics.increment('api_vm.post.ondemand.requestid') + metrics.increment('http_requests_vm_total.post.ondemand.requestid') need_token! if Vmpooler::API.settings.config[:auth] @@ -847,7 +845,7 @@ module Vmpooler post "#{api_prefix}/ondemandvm/:template/?" do content_type :json result = { 'ok' => false } - metrics.increment('api_vm.delete.ondemand.template') + metrics.increment('http_requests_vm_total.delete.ondemand.template') need_token! if Vmpooler::API.settings.config[:auth] @@ -874,7 +872,7 @@ module Vmpooler get "#{api_prefix}/ondemandvm/:requestid/?" do content_type :json - metrics.increment('api_vm.get.ondemand.request') + metrics.increment('http_requests_vm_total.get.ondemand.request') status 404 result = check_ondemand_request(params[:requestid]) @@ -885,7 +883,7 @@ module Vmpooler delete "#{api_prefix}/ondemandvm/:requestid/?" do content_type :json need_token! if Vmpooler::API.settings.config[:auth] - metrics.increment('api_vm.delete.ondemand.request') + metrics.increment('http_requests_vm_total.delete.ondemand.request') status 404 result = delete_ondemand_request(params[:requestid]) @@ -896,7 +894,7 @@ module Vmpooler post "#{api_prefix}/vm/?" do content_type :json result = { 'ok' => false } - metrics.increment('api_vm.post.vm.checkout') + metrics.increment('http_requests_vm_total.post.vm.checkout') payload = JSON.parse(request.body.read) @@ -1051,7 +1049,7 @@ module Vmpooler post "#{api_prefix}/vm/:template/?" do content_type :json result = { 'ok' => false } - metrics.increment('api_vm.get.vm.template') + metrics.increment('http_requests_vm_total.get.vm.template') payload = extract_templates_from_query_params(params[:template]) @@ -1075,7 +1073,7 @@ module Vmpooler get "#{api_prefix}/vm/:hostname/?" do content_type :json - metrics.increment('api_vm.get.vm.hostname') + metrics.increment('http_requests_vm_total.get.vm.hostname') result = {} @@ -1148,7 +1146,7 @@ module Vmpooler delete "#{api_prefix}/vm/:hostname/?" do content_type :json - metrics.increment('api_vm.delete.vm.hostname') + metrics.increment('http_requests_vm_total.delete.vm.hostname') result = {} @@ -1177,7 +1175,7 @@ module Vmpooler put "#{api_prefix}/vm/:hostname/?" do content_type :json - metrics.increment('api_vm.put.vm.modify') + metrics.increment('http_requests_vm_total.put.vm.modify') status 404 result = { 'ok' => false } @@ -1254,7 +1252,7 @@ module Vmpooler post "#{api_prefix}/vm/:hostname/disk/:size/?" do content_type :json - metrics.increment('api_vm.post.vm.disksize') + metrics.increment('http_requests_vm_total.post.vm.disksize') need_token! if Vmpooler::API.settings.config[:auth] @@ -1278,7 +1276,7 @@ module Vmpooler post "#{api_prefix}/vm/:hostname/snapshot/?" do content_type :json - metrics.increment('api_vm.post.vm.snapshot') + metrics.increment('http_requests_vm_total.post.vm.snapshot') need_token! if Vmpooler::API.settings.config[:auth] @@ -1304,7 +1302,7 @@ module Vmpooler post "#{api_prefix}/vm/:hostname/snapshot/:snapshot/?" do content_type :json - metrics.increment('api_vm.post.vm.disksize') + metrics.increment('http_requests_vm_total.post.vm.disksize') need_token! if Vmpooler::API.settings.config[:auth] diff --git a/lib/vmpooler/metrics/promstats.rb b/lib/vmpooler/metrics/promstats.rb index c7aeb6d..bee432f 100644 --- a/lib/vmpooler/metrics/promstats.rb +++ b/lib/vmpooler/metrics/promstats.rb @@ -5,7 +5,7 @@ require 'prometheus/client' module Vmpooler class Metrics class Promstats < Metrics - attr_reader :prefix, :endpoint, :metrics_prefix + attr_reader :prefix, :prometheus_endpoint, :prometheus_prefix # Constants for Metric Types M_COUNTER = 1 @@ -21,25 +21,139 @@ module Vmpooler REDIS_CONNECT_BUCKETS = [1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 18.0, 23.0].freeze @p_metrics = {} + @torun = [] def initialize(logger, params = {}) @prefix = params['prefix'] || 'vmpooler' - @metrics_prefix = params['metrics_prefix'] || 'vmpooler' - @endpoint = params['endpoint'] || '/prometheus' + @prometheus_prefix = params['prometheus_prefix'] || 'vmpooler' + @prometheus_endpoint = params['prometheus_endpoint'] || '/prometheus' @logger = logger # Setup up prometheus registry and data structures @prometheus = Prometheus::Client.registry end - # Metrics structure used to register the metrics and also translate/interpret the incoming metrics. +=begin # rubocop:disable Style/BlockComments + The Metrics table is used to register metrics and translate/interpret the incoming metrics. + + This table describes all of the prometheus metrics that are recognised by the application. + The background documentation for defining metrics is at: https://prometheus.io/docs/introduction/ + In particular, the naming practices should be adhered to: https://prometheus.io/docs/practices/naming/ + The Ruby Client docs are also useful: https://github.com/prometheus/client_ruby + + The table here allows the currently used stats definitions to be translated correctly for Prometheus. + The current format is of the form A.B.C, where the final fields may be actual values (e.g. poolname). + Prometheus metrics cannot use the '.' as a character, so this is either translated into '_' or + variable parameters are expressed as labels accompanying the metric. + + Sample statistics are: + # Example showing hostnames (FQDN) + migrate_from.pix-jj26-chassis1-2.ops.puppetlabs.net + migrate_to.pix-jj26-chassis1-8.ops.puppetlabs.net + + # Example showing poolname as a parameter + poolreset.invalid.centos-8-x86_64 + + # Examples showing similar sub-typed checkout stats + checkout.empty.centos-8-x86_64 + checkout.invalid.centos-8-x86_64 + checkout.invalid.unknown + checkout.success.centos-8-x86_64 + + # Stats without any final parameter. + connect.fail + connect.open + delete.failed + delete.success + + # Stats with multiple param_labels + vmpooler_user.debian-8-x86_64-pixa4.john + + The metrics implementation here preserves the existing framework which will continue to support + graphite and statsd (since vmpooler is used outside of puppet). Some rationalisation and renaming + of the actual metrics was done to get a more usable model to fit within the prometheus framework. + This particularly applies to the user stats collected once individual machines are terminated as + this would have challenged prometheus' ability due to multiple (8) parameters being collected + in a single measure (which has a very high cardinality). + + Prometheus requires all metrics to be pre-registered (which is the primary reason for this + table) and also uses labels to differentiate the characteristics of the measurement. This + is used throughout to capture information such as poolnames. So for example, this is a sample + of the prometheus metrics generated for the "vmpooler_ready" measurement: + + # TYPE vmpooler_ready gauge + # HELP vmpooler_ready vmpooler number of machines in ready State + vmpooler_ready{vmpooler_instance="vmpooler",poolname="win-10-ent-x86_64-pixa4"} 2.0 + vmpooler_ready{vmpooler_instance="vmpooler",poolname="debian-8-x86_64-pixa4"} 2.0 + vmpooler_ready{vmpooler_instance="vmpooler",poolname="centos-8-x86_64-pixa4"} 2.0 + + Prometheus supports the following metric types: + (see https://prometheus.io/docs/concepts/metric_types/) + + Counter (increment): + A counter is a cumulative metric that represents a single monotonically increasing counter whose + value can only increase or be reset to zero on restart + + Gauge: + A gauge is a metric that represents a single numerical value that can arbitrarily go up and down. + + Histogram: + A histogram samples observations (usually things like request durations or response sizes) and + counts them in configurable buckets. It also provides a sum of all observed values. + This replaces the timer metric supported by statsd + + Summary : + Summary provides a total count of observations and a sum of all observed values, it calculates + configurable quantiles over a sliding time window. + (Summary is not used in vmpooler) + + vmpooler_metrics_table is a table of hashes, where the hash key represents the first part of the + metric name, e.g. for the metric 'delete.*' (see above) the key would be 'delete:'. "Sub-metrics", + are supported, again for the 'delete.*' example, this can be subbed into '.failed' and '.success' + + The entries within the hash as are follows: + + mtype: + Metric type, which is one of the following constants: + M_COUNTER = 1 + M_GAUGE = 2 + M_SUMMARY = 3 + M_HISTOGRAM = 4 + + torun: + Indicates which process the metric is for - within vmpooler this is either ':api' or ':manager' + (there is a suggestion that we change this to two separate tables). + + docstring: + Documentation string for the metric - this is displayed as HELP text by the endpoint. + + metric_suffixes: + Array of sub-metrics of the form 'sub-metric: "doc-string for sub-metric"'. This supports + the generation of individual sub-metrics for all elements in the array. + + param_labels: + This is an optional array of symbols for the final labels in a metric. It should not be + specified if there are no additional parameters. + + If it specified, it can either be a single symbol, or two or more symbols. The treatment + differs if there is only one symbol given as all of the remainder of the metric string + supplied is collected into a label with the symbol name. This allows the handling of + node names (FQDN). + + To illustrate: + 1. In the 'connect.*' or 'delete.*' example above, it should not be specified. + 2. For the 'migrate_from.*' example above, the remainder of the measure is collected + as the 'host_name' label. + 3. For the 'vmpooler_user' example above, the first parameter is treated as the pool + name, and the second as the username. + +=end def vmpooler_metrics_table { errors: { mtype: M_COUNTER, torun: %i[manager], docstring: 'Count of errors for pool', - prom_metric_prefix: "#{@metrics_prefix}_errors", metric_suffixes: { markedasfailed: 'timeout waiting for instance to initialise', duplicatehostname: 'unable to create instance due to duplicate hostname', @@ -51,42 +165,36 @@ module Vmpooler mtype: M_COUNTER, torun: %i[manager], docstring: 'Number of pool instances this user created created', - prom_metric_prefix: "#{@metrics_prefix}_user", param_labels: %i[user poolname] }, usage_litmus: { mtype: M_COUNTER, torun: %i[manager], docstring: 'Pools by Litmus job usage', - prom_metric_prefix: "#{@metrics_prefix}_usage_litmus", param_labels: %i[user poolname] }, usage_jenkins_instance: { mtype: M_COUNTER, torun: %i[manager], docstring: 'Pools by Jenkins instance usage', - prom_metric_prefix: "#{@metrics_prefix}_usage_jenkins_instance", param_labels: %i[jenkins_instance value_stream poolname] }, usage_branch_project: { mtype: M_COUNTER, torun: %i[manager], docstring: 'Pools by branch/project usage', - prom_metric_prefix: "#{@metrics_prefix}_usage_branch_project", param_labels: %i[branch project poolname] }, usage_job_component: { mtype: M_COUNTER, torun: %i[manager], docstring: 'Pools by job/component usage', - prom_metric_prefix: "#{@metrics_prefix}_usage_job_component", param_labels: %i[job_name component_to_test poolname] }, checkout: { mtype: M_COUNTER, torun: %i[api], docstring: 'Pool checkout counts', - prom_metric_prefix: "#{@metrics_prefix}_checkout", metric_suffixes: { nonresponsive: 'checkout failed - non responsive machine', empty: 'checkout failed - no machine', @@ -99,7 +207,6 @@ module Vmpooler mtype: M_COUNTER, torun: %i[api], docstring: 'Delete machine', - prom_metric_prefix: "#{@metrics_prefix}_delete", metric_suffixes: { success: 'succeeded', failed: 'failed' @@ -110,7 +217,6 @@ module Vmpooler mtype: M_COUNTER, torun: %i[api], docstring: 'Ondemand request', - prom_metric_prefix: "#{@metrics_prefix}_ondemandrequest_generate", metric_suffixes: { duplicaterequests: 'failed duplicate request', success: 'succeeded' @@ -121,7 +227,6 @@ module Vmpooler mtype: M_COUNTER, torun: %i[api], docstring: 'Ondemand request failure', - prom_metric_prefix: "#{@metrics_prefix}_ondemandrequest_fail", metric_suffixes: { toomanyrequests: 'too many requests', invalid: 'invalid poolname' @@ -132,7 +237,6 @@ module Vmpooler mtype: M_COUNTER, torun: %i[api], docstring: 'vmpooler pool configuration request', - prom_metric_prefix: "#{@metrics_prefix}_config", metric_suffixes: { invalid: 'Invalid' }, param_labels: %i[poolname] }, @@ -140,7 +244,6 @@ module Vmpooler mtype: M_COUNTER, torun: %i[api], docstring: 'Pool reset counter', - prom_metric_prefix: "#{@metrics_prefix}_poolreset", metric_suffixes: { invalid: 'Invalid Pool' }, param_labels: %i[poolname] }, @@ -148,7 +251,6 @@ module Vmpooler mtype: M_COUNTER, torun: %i[manager], docstring: 'vmpooler connect (to vSphere)', - prom_metric_prefix: "#{@metrics_prefix}_connect", metric_suffixes: { open: 'Connect Succeeded', fail: 'Connect Failed' @@ -159,42 +261,36 @@ module Vmpooler mtype: M_COUNTER, torun: %i[manager], docstring: 'vmpooler machine migrated from', - prom_metric_prefix: "#{@metrics_prefix}_migrate_from", param_labels: %i[host_name] }, migrate_to: { mtype: M_COUNTER, torun: %i[manager], docstring: 'vmpooler machine migrated to', - prom_metric_prefix: "#{@metrics_prefix}_migrate_to", param_labels: %i[host_name] }, - api_vm: { + http_requests_vm_total: { mtype: M_COUNTER, torun: %i[api], docstring: 'Total number of HTTP request/sub-operations handled by the Rack application under the /vm endpoint', - prom_metric_prefix: "#{@metrics_prefix}_http_requests_vm_total", param_labels: %i[method subpath operation] }, ready: { mtype: M_GAUGE, torun: %i[manager], docstring: 'vmpooler number of machines in ready State', - prom_metric_prefix: "#{@metrics_prefix}_ready", param_labels: %i[poolname] }, running: { mtype: M_GAUGE, torun: %i[manager], docstring: 'vmpooler number of machines running', - prom_metric_prefix: "#{@metrics_prefix}_running", param_labels: %i[poolname] }, connection_available: { mtype: M_GAUGE, torun: %i[manager], docstring: 'vmpooler redis connections available', - prom_metric_prefix: "#{@metrics_prefix}_connection_available", param_labels: %i[type provider] }, time_to_ready_state: { @@ -202,7 +298,6 @@ module Vmpooler torun: %i[manager], buckets: POOLER_READY_TIME_BUCKETS, docstring: 'Time taken for machine to read ready state for pool', - prom_metric_prefix: "#{@metrics_prefix}_time_to_ready_state", param_labels: %i[poolname] }, migrate: { @@ -210,7 +305,6 @@ module Vmpooler torun: %i[manager], buckets: POOLER_CLONE_TIME_BUCKETS, docstring: 'vmpooler time taken to migrate machine for pool', - prom_metric_prefix: "#{@metrics_prefix}_migrate", param_labels: %i[poolname] }, clone: { @@ -218,7 +312,6 @@ module Vmpooler torun: %i[manager], buckets: POOLER_CLONE_TIME_BUCKETS, docstring: 'vmpooler time taken to clone machine', - prom_metric_prefix: "#{@metrics_prefix}_clone", param_labels: %i[poolname] }, destroy: { @@ -226,7 +319,6 @@ module Vmpooler torun: %i[manager], buckets: POOLER_CLONE_TIME_BUCKETS, docstring: 'vmpooler time taken to destroy machine', - prom_metric_prefix: "#{@metrics_prefix}_destroy", param_labels: %i[poolname] }, connection_waited: { @@ -234,7 +326,6 @@ module Vmpooler torun: %i[manager], buckets: REDIS_CONNECT_BUCKETS, docstring: 'vmpooler redis connection wait time', - prom_metric_prefix: "#{@metrics_prefix}_connection_waited", param_labels: %i[type provider] } } @@ -278,8 +369,9 @@ module Vmpooler # Top level method to register all the prometheus metrics. def setup_prometheus_metrics(torun) + @torun = torun @p_metrics = vmpooler_metrics_table - @p_metrics.each do |_name, metric_spec| + @p_metrics.each do |name, metric_spec| # Only register metrics appropriate to api or manager next if (torun & metric_spec[:torun]).empty? @@ -288,7 +380,7 @@ module Vmpooler metric_spec[:metric_suffixes].each do |metric_suffix| add_prometheus_metric( metric_spec, - "#{metric_spec[:prom_metric_prefix]}_#{metric_suffix[0]}", + "#{@prometheus_prefix}_#{name}_#{metric_suffix[0]}", "#{metric_spec[:docstring]} #{metric_suffix[1]}" ) end @@ -296,7 +388,7 @@ module Vmpooler # No Additional counter suffixes so register this as metric. add_prometheus_metric( metric_spec, - metric_spec[:prom_metric_prefix], + "#{@prometheus_prefix}_#{name}", metric_spec[:docstring] ) end @@ -309,15 +401,18 @@ module Vmpooler metric_key = sublabels.shift.to_sym raise("Invalid Metric #{metric_key} for #{label}") unless @p_metrics.key? metric_key - metric = @p_metrics[metric_key].clone + metric_spec = @p_metrics[metric_key] + raise("Invalid Component #{component} for #{metric_key}") if (metric_spec[:torun] & @torun).nil? + + metric = metric_spec.clone if metric.key? :metric_suffixes metric_subkey = sublabels.shift.to_sym raise("Invalid Metric #{metric_key}_#{metric_subkey} for #{label}") unless metric[:metric_suffixes].key? metric_subkey.to_sym - metric[:metric_name] = "#{metric[:prom_metric_prefix]}_#{metric_subkey}" + metric[:metric_name] = "#{@prometheus_prefix}_#{metric_key}_#{metric_subkey}" else - metric[:metric_name] = metric[:prom_metric_prefix] + metric[:metric_name] = "#{@prometheus_prefix}_#{metric_key}" end # Check if we are looking for a parameter value at last element. diff --git a/lib/vmpooler/metrics/promstats/collector_middleware.rb b/lib/vmpooler/metrics/promstats/collector_middleware.rb index bfaa7a0..540b1e6 100644 --- a/lib/vmpooler/metrics/promstats/collector_middleware.rb +++ b/lib/vmpooler/metrics/promstats/collector_middleware.rb @@ -114,6 +114,9 @@ module Vmpooler path .gsub(%r{/vm/.+$}, '/vm') .gsub(%r{/ondemand/.+$}, '/ondemand') + .gsub(%r{/token/.+$}, '/token') + .gsub(%r{/lib/.+$}, '/lib') + .gsub(%r{/img/.+$}, '/img') end end end diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index 0d56fbc..95927d4 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -150,8 +150,11 @@ module Vmpooler redis.pipelined do redis.hset("vmpooler__active__#{pool}", vm, Time.now) redis.hset("vmpooler__vm__#{vm}", 'checkout', Time.now) - redis.hset("vmpooler__vm__#{vm}", 'token:token', ondemandrequest_hash['token:token']) if ondemandrequest_hash['token:token'] - redis.hset("vmpooler__vm__#{vm}", 'token:user', ondemandrequest_hash['token:user']) if ondemandrequest_hash['token:user'] + if ondemandrequest_hash['token:token'] + redis.hset("vmpooler__vm__#{vm}", 'token:token', ondemandrequest_hash['token:token']) + redis.hset("vmpooler__vm__#{vm}", 'token:user', ondemandrequest_hash['token:user']) + redis.hset("vmpooler__vm__#{vm}", 'lifetime', $config[:config]['vm_lifetime_auth'].to_i) + end redis.sadd("vmpooler__#{request_id}__#{pool_alias}__#{pool}", vm) end move_vm_queue(pool, vm, 'pending', 'running', redis) @@ -365,7 +368,7 @@ module Vmpooler $metrics.increment("errors.duplicatehostname.#{pool_name}") $logger.log('s', "[!] [#{pool_name}] Generated hostname #{hostname} was not unique (attempt \##{hostname_retries} of #{max_hostname_retries})") elsif !dns_available - $metrics.increment("errors.staledns.#{hostname}") + $metrics.increment("errors.staledns.#{pool_name}") $logger.log('s', "[!] [#{pool_name}] Generated hostname #{hostname} already exists in DNS records (#{dns_ip}), stale DNS") end end @@ -536,15 +539,14 @@ module Vmpooler def purge_unused_vms_and_folders global_purge = $config[:config]['purge_unconfigured_folders'] providers = $config[:providers].keys - providers.each do |provider| - provider_purge = $config[:providers][provider]['purge_unconfigured_folders'] - provider_purge = global_purge if provider_purge.nil? + providers.each do |provider_key| + provider_purge = $config[:providers][provider_key]['purge_unconfigured_folders'] || global_purge if provider_purge Thread.new do begin - purge_vms_and_folders($providers[provider.to_s]) + purge_vms_and_folders(provider_key) rescue StandardError => e - $logger.log('s', "[!] failed while purging provider #{provider} VMs and folders with an error: #{e}") + $logger.log('s', "[!] failed while purging provider #{provider_key} VMs and folders with an error: #{e}") end end end @@ -553,14 +555,13 @@ module Vmpooler end # Return a list of pool folders - def pool_folders(provider) - provider_name = provider.name + def pool_folders(provider_name) folders = {} $config[:pools].each do |pool| - next unless pool['provider'] == provider_name + next unless pool['provider'] == provider_name.to_s folder_parts = pool['folder'].split('/') - datacenter = provider.get_target_datacenter_from_config(pool['name']) + datacenter = $providers[provider_name.to_s].get_target_datacenter_from_config(pool['name']) folders[folder_parts.pop] = "#{datacenter}/vm/#{folder_parts.join('/')}" end folders @@ -574,8 +575,9 @@ module Vmpooler base.uniq end - def purge_vms_and_folders(provider) - configured_folders = pool_folders(provider) + def purge_vms_and_folders(provider_name) + provider = $providers[provider_name.to_s] + configured_folders = pool_folders(provider_name) base_folders = get_base_folders(configured_folders) whitelist = provider.provider_config['folder_whitelist'] provider.purge_unconfigured_folders(base_folders, configured_folders, whitelist) diff --git a/lib/vmpooler/version.rb b/lib/vmpooler/version.rb index 2aced2c..7d40410 100644 --- a/lib/vmpooler/version.rb +++ b/lib/vmpooler/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Vmpooler - VERSION = '0.14.1' + VERSION = '0.14.5' end diff --git a/spec/integration/api/v1/ondemandvm_spec.rb b/spec/integration/api/v1/ondemandvm_spec.rb index 37b315c..a361f9f 100644 --- a/spec/integration/api/v1/ondemandvm_spec.rb +++ b/spec/integration/api/v1/ondemandvm_spec.rb @@ -24,15 +24,19 @@ describe Vmpooler::API::V1 do 'vm_lifetime_auth' => 2, 'max_ondemand_instances_per_request' => 50, 'backend_weight' => { - 'compute1' => 5 + 'compute1' => 5, + 'compute2' => 0 } }, pools: [ - {'name' => 'pool1', 'size' => 0}, - {'name' => 'pool2', 'size' => 0, 'clone_target' => 'compute1'}, + {'name' => 'pool1', 'size' => 0, 'clone_target' => 'compute1'}, + {'name' => 'pool2', 'size' => 0, 'clone_target' => 'compute2'}, {'name' => 'pool3', 'size' => 0, 'clone_target' => 'compute1'} ], - alias: { 'poolone' => ['pool1'] }, + alias: { + 'poolone' => ['pool1'], + 'pool2' => ['pool1'] + }, pool_names: [ 'pool1', 'pool2', 'pool3', 'poolone' ] } } @@ -92,6 +96,22 @@ describe Vmpooler::API::V1 do post "#{prefix}/ondemandvm", '{"poolone":"1"}' end + context 'with a backend of 0 weight' do + before(:each) do + config[:config]['backend_weight']['compute1'] = 0 + end + + it 'sets the platform string in redis for the request to indicate the selected platforms' do + expect(redis).to receive(:hset).with("vmpooler__odrequest__#{uuid}", 'requested', 'pool1:pool1:1') + post "#{prefix}/ondemandvm", '{"pool1":"1"}' + end + end + + it 'sets the platform string in redis for the request to indicate the selected platforms using weight' do + expect(redis).to receive(:hset).with("vmpooler__odrequest__#{uuid}", 'requested', 'pool2:pool1:1') + post "#{prefix}/ondemandvm", '{"pool2":"1"}' + end + context 'with domain set in the config' do let(:domain) { 'example.com' } before(:each) do diff --git a/spec/unit/collector_middleware_spec.rb b/spec/unit/collector_middleware_spec.rb index 671e118..5244c25 100644 --- a/spec/unit/collector_middleware_spec.rb +++ b/spec/unit/collector_middleware_spec.rb @@ -55,7 +55,7 @@ describe Vmpooler::Metrics::Promstats::CollectorMiddleware do expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.25" => 1) end - it 'normalizes paths cotaining /vm by default' do + it 'normalizes paths containing /vm by default' do expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3) get '/foo/vm/bar-mumble-flame' @@ -83,6 +83,62 @@ describe Vmpooler::Metrics::Promstats::CollectorMiddleware do expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) end + it 'normalizes paths containing /token by default' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3) + + get '/token/secret-token-name' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/token', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/token' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) + end + + it 'normalizes paths containing /api/v1/token by default' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3) + + get '/api/v1/token/secret-token-name' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/api/v1/token', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/api/v1/token' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) + end + + it 'normalizes paths containing /img by default' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3) + + get '/img/image-name' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/img', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/img' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) + end + + it 'normalizes paths containing /lib by default' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3) + + get '/lib/xxxxx.js' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/lib', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/lib' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) + end + context 'when the app raises an exception' do let(:original_app) do lambda do |env| diff --git a/spec/unit/generic_connection_pool_spec.rb b/spec/unit/generic_connection_pool_spec.rb index 5c24215..4777795 100644 --- a/spec/unit/generic_connection_pool_spec.rb +++ b/spec/unit/generic_connection_pool_spec.rb @@ -25,6 +25,13 @@ describe 'GenericConnectionPool' do connection: 'connection' }} + it 'should error out when connection pool could not establish no nothing' do + newsub = Vmpooler.redis_connection_pool("foo,bar", "1234", "fuba", 1, 1, metrics, 0) + expect { newsub.with_metrics do |conn_pool_object| + conn_pool_object.srem('foo', "bar") + end }.to raise_error Redis::CannotConnectError + end + it 'should return a connection object when grabbing one from the pool' do subject.with_metrics do |conn_pool_object| expect(conn_pool_object).to be(connection_object) diff --git a/spec/unit/pool_manager_spec.rb b/spec/unit/pool_manager_spec.rb index 446d1ba..e8a17a1 100644 --- a/spec/unit/pool_manager_spec.rb +++ b/spec/unit/pool_manager_spec.rb @@ -913,7 +913,7 @@ EOT resolv = class_double("Resolv").as_stubbed_const(:transfer_nested_constants => true) expect(subject).to receive(:generate_and_check_hostname).exactly(3).times.and_return([vm_name, true]) #skip this, make it available all times expect(resolv).to receive(:getaddress).exactly(3).times.and_return("1.2.3.4") - expect(metrics).to receive(:increment).with("errors.staledns.#{vm_name}").exactly(3).times + expect(metrics).to receive(:increment).with("errors.staledns.#{pool}").exactly(3).times expect{subject._clone_vm(pool,provider)}.to raise_error(/Unable to generate a unique hostname after/) end it 'should be successful if DNS does not exist' do @@ -1418,16 +1418,24 @@ EOT ) } - it 'should return a list of pool folders' do - expect(provider).to receive(:get_target_datacenter_from_config).with(pool).and_return(datacenter) + context 'when evaluating pool folders' do + before do + expect(subject).not_to be_nil + # Inject mock provider into global variable - Note this is a code smell + $providers = { provider_name => provider } + end - expect(subject.pool_folders(provider)).to eq(expected_response) - end + it 'should return a list of pool folders' do + expect(provider).to receive(:get_target_datacenter_from_config).with(pool).and_return(datacenter) - it 'should raise an error when the provider fails to get the datacenter' do - expect(provider).to receive(:get_target_datacenter_from_config).with(pool).and_raise('mockerror') + expect(subject.pool_folders(provider_name)).to eq(expected_response) + end - expect{ subject.pool_folders(provider) }.to raise_error(RuntimeError, 'mockerror') + it 'should raise an error when the provider fails to get the datacenter' do + expect(provider).to receive(:get_target_datacenter_from_config).with(pool).and_raise('mockerror') + + expect{ subject.pool_folders(provider_name) }.to raise_error(RuntimeError, 'mockerror') + end end end @@ -1456,20 +1464,28 @@ EOT ) } - it 'should run purge_unconfigured_folders' do - expect(subject).to receive(:pool_folders).and_return(configured_folders) - expect(provider).to receive(:purge_unconfigured_folders).with(base_folders, configured_folders, whitelist) - expect(provider).to receive(:provider_config).and_return({}) + context 'when purging folders' do + before do + expect(subject).not_to be_nil + # Inject mock provider into global variable - Note this is a code smell + $providers = { provider_name => provider } + end - subject.purge_vms_and_folders(provider) - end + it 'should run purge_unconfigured_folders' do + expect(subject).to receive(:pool_folders).and_return(configured_folders) + expect(provider).to receive(:purge_unconfigured_folders).with(base_folders, configured_folders, whitelist) + expect(provider).to receive(:provider_config).and_return({}) - it 'should raise any errors' do - expect(subject).to receive(:pool_folders).and_return(configured_folders) - expect(provider).to receive(:purge_unconfigured_folders).with(base_folders, configured_folders, whitelist).and_raise('mockerror') - expect(provider).to receive(:provider_config).and_return({}) + subject.purge_vms_and_folders(provider_name) + end - expect{ subject.purge_vms_and_folders(provider) }.to raise_error(RuntimeError, 'mockerror') + it 'should raise any errors' do + expect(subject).to receive(:pool_folders).and_return(configured_folders) + expect(provider).to receive(:purge_unconfigured_folders).with(base_folders, configured_folders, whitelist).and_raise('mockerror') + expect(provider).to receive(:provider_config).and_return({}) + + expect{ subject.purge_vms_and_folders(provider_name) }.to raise_error(RuntimeError, 'mockerror') + end end end diff --git a/spec/unit/promstats_spec.rb b/spec/unit/promstats_spec.rb index eb66c5f..dca2e12 100644 --- a/spec/unit/promstats_spec.rb +++ b/spec/unit/promstats_spec.rb @@ -4,7 +4,7 @@ require 'spec_helper' describe 'prometheus' do logger = MockLogger.new - params = { 'prefix': 'test', 'metrics_prefix': 'mtest', 'endpoint': 'eptest' } + params = { 'prefix' => 'test', 'prometheus_prefix' => 'mtest', 'prometheus_endpoint' => 'eptest' } subject = Vmpooler::Metrics::Promstats.new(logger, params) let(:logger) { MockLogger.new } @@ -21,10 +21,13 @@ describe 'prometheus' do param_labels: %i[first second last] } end let!(:labels_hash) { { labels: { :first => nil, :second => nil, :last => nil } } } - before { subject.instance_variable_set(:@p_metrics, { foo: foo_metrics }) } + before { + subject.instance_variable_set(:@p_metrics, { foo: foo_metrics, torun: %i[api] }) + subject.instance_variable_set(:@torun, [ :api ]) + } it 'returns the metric for a given label including parsed labels' do - expect(subject.find_metric('foo.bar')).to include(metric_name: '_bar') + expect(subject.find_metric('foo.bar')).to include(metric_name: 'mtest_foo_bar') expect(subject.find_metric('foo.bar')).to include(foo_metrics) expect(subject.find_metric('foo.bar')).to include(labels_hash) end @@ -41,10 +44,15 @@ describe 'prometheus' do context "Node Name Handling" do let!(:node_metrics) do { metric_name: 'connection_to', - param_labels: %i[node] } + param_labels: %i[node], + torun: %i[api] + } end let!(:nodename_hash) { { labels: { :node => 'test.bar.net'}}} - before { subject.instance_variable_set(:@p_metrics, { connection_to: node_metrics }) } + before { + subject.instance_variable_set(:@p_metrics, { connection_to: node_metrics }) + subject.instance_variable_set(:@torun, [ :api ]) + } it 'Return final remaining fields (e.g. fqdn) in last label' do expect(subject.find_metric('connection_to.test.bar.net')).to include(nodename_hash) @@ -57,7 +65,6 @@ describe 'prometheus' do Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new subject.setup_prometheus_metrics(%i[api manager]) end - let(:MCOUNTER) { 1 } describe '#setup_prometheus_metrics' do it 'calls add_prometheus_metric for each item in list' do @@ -260,12 +267,12 @@ describe 'prometheus' do po.get(labels: metric[:labels]) }.by(1) end - it 'Increments label api_vm.#{method}.#{subpath}.#{operation}' do + it 'Increments label http_requests_vm_total.#{method}.#{subpath}.#{operation}' do method = 'get' subpath = 'template' operation = 'something' - expect { subject.increment("api_vm.#{method}.#{subpath}.#{operation}") }.to change { - metric, po = subject.get("api_vm.#{method}.#{subpath}.#{operation}") + expect { subject.increment("http_requests_vm_total.#{method}.#{subpath}.#{operation}") }.to change { + metric, po = subject.get("http_requests_vm_total.#{method}.#{subpath}.#{operation}") po.get(labels: metric[:labels]) }.by(1) end diff --git a/vmpooler.yaml.example b/vmpooler.yaml.example index 6120c50..6194167 100644 --- a/vmpooler.yaml.example +++ b/vmpooler.yaml.example @@ -204,6 +204,12 @@ # - redis_connection_pool_timeout # How long a task should wait (in seconds) for a redis connection when all connections are in use. # (default: 5) +# +# - reconnect_attempts +# How many times to retry one redis connection, for example if the host:port is not available +# The time between attempts starts at 1.5s and increases up to 10s, in such a way that 10 attempts +# takes about 80s, at which point an error is returned. +# (default: 10) # Example: @@ -240,7 +246,7 @@ # # This section contains the connection information required to store # historical data via statsd. This is mutually exclusive with graphite -# and takes precedence. +# and prometheus and takes precedence. # # Available configuration parameters: # @@ -267,7 +273,7 @@ # # This section contains the connection information required to store # historical data in an external Graphite database. This is mutually exclusive -# with statsd. +# with statsd and prometheus - i.e. only one can be selected. # # Available configuration parameters: # @@ -288,6 +294,34 @@ :graphite: server: 'graphite.example.com' +# :prometheus +# +# This section contains the connection information required to store +# historical data in an external Graphite database. This is mutually exclusive +# with statsd and graphite - i.e. only one can be selected. +# +# Available configuration parameters: +# +# - prefix +# The prefix for this vmpooler instance. +# (optional; default: 'vmpooler') +# +# - prometheus_prefix +# The prefix to use while storing prometheus data. +# (optional; default: 'vmpooler') +# +# - prometheus_endpoint +# The metrics endpoint on the vmpooler server +# (optional; default: '/prometheus') + +# Example: + +:prometheus: + prefix: 'staging' + prometheus_prefix: 'vmpooler' + prometheus_endpoint: '/prometheus' + + # :auth: # # This section contains information related to authenticating users