diff --git a/lib/vmpooler.rb b/lib/vmpooler.rb index 98b4bd6..20eb26b 100644 --- a/lib/vmpooler.rb +++ b/lib/vmpooler.rb @@ -15,9 +15,6 @@ module Vmpooler require 'timeout' require 'yaml' - require 'prometheus/middleware/collector' - require 'prometheus/middleware/exporter' - %w[api metrics logger pool_manager generic_connection_pool].each do |lib| require "vmpooler/#{lib}" end diff --git a/lib/vmpooler/api.rb b/lib/vmpooler/api.rb index 068f024..cf516a7 100644 --- a/lib/vmpooler/api.rb +++ b/lib/vmpooler/api.rb @@ -35,7 +35,10 @@ module Vmpooler # in the config file. metrics.setup_prometheus_metrics - use Prometheus::Middleware::Collector, metrics_prefix: "#{metrics.metrics_prefix}_http" + # Using customised collector that filters out hostnames on API paths + require 'vmpooler/metrics/promstats/collector_middleware' + require 'prometheus/middleware/exporter' + use Vmpooler::Metrics::Promstats::CollectorMiddleware, metrics_prefix: "#{metrics.metrics_prefix}_http" use Prometheus::Middleware::Exporter, path: metrics.endpoint end diff --git a/lib/vmpooler/api/v1.rb b/lib/vmpooler/api/v1.rb index 5ecb2b5..c5cb0bf 100644 --- a/lib/vmpooler/api/v1.rb +++ b/lib/vmpooler/api/v1.rb @@ -809,6 +809,7 @@ module Vmpooler post "#{api_prefix}/ondemandvm/?" do content_type :json + metrics.increment('api_vm.post.ondemand.requestid') need_token! if Vmpooler::API.settings.config[:auth] @@ -846,6 +847,7 @@ module Vmpooler post "#{api_prefix}/ondemandvm/:template/?" do content_type :json result = { 'ok' => false } + metrics.increment('api_vm.delete.ondemand.template') need_token! if Vmpooler::API.settings.config[:auth] @@ -872,6 +874,7 @@ module Vmpooler get "#{api_prefix}/ondemandvm/:requestid/?" do content_type :json + metrics.increment('api_vm.get.ondemand.request') status 404 result = check_ondemand_request(params[:requestid]) @@ -882,6 +885,7 @@ module Vmpooler delete "#{api_prefix}/ondemandvm/:requestid/?" do content_type :json need_token! if Vmpooler::API.settings.config[:auth] + metrics.increment('api_vm.delete.ondemand.request') status 404 result = delete_ondemand_request(params[:requestid]) @@ -892,6 +896,7 @@ module Vmpooler post "#{api_prefix}/vm/?" do content_type :json result = { 'ok' => false } + metrics.increment('api_vm.post.vm.checkout') payload = JSON.parse(request.body.read) @@ -1034,6 +1039,7 @@ module Vmpooler post "#{api_prefix}/vm/:template/?" do content_type :json result = { 'ok' => false } + metrics.increment('api_vm.get.vm.template') payload = extract_templates_from_query_params(params[:template]) @@ -1057,6 +1063,7 @@ module Vmpooler get "#{api_prefix}/vm/:hostname/?" do content_type :json + metrics.increment('api_vm.get.vm.hostname') result = {} @@ -1129,6 +1136,7 @@ module Vmpooler delete "#{api_prefix}/vm/:hostname/?" do content_type :json + metrics.increment('api_vm.delete.vm.hostname') result = {} @@ -1156,6 +1164,7 @@ module Vmpooler put "#{api_prefix}/vm/:hostname/?" do content_type :json + metrics.increment('api_vm.put.vm.modify') status 404 result = { 'ok' => false } @@ -1232,6 +1241,7 @@ module Vmpooler post "#{api_prefix}/vm/:hostname/disk/:size/?" do content_type :json + metrics.increment('api_vm.post.vm.disksize') need_token! if Vmpooler::API.settings.config[:auth] @@ -1255,6 +1265,7 @@ module Vmpooler post "#{api_prefix}/vm/:hostname/snapshot/?" do content_type :json + metrics.increment('api_vm.post.vm.snapshot') need_token! if Vmpooler::API.settings.config[:auth] @@ -1280,6 +1291,7 @@ module Vmpooler post "#{api_prefix}/vm/:hostname/snapshot/:snapshot/?" do content_type :json + metrics.increment('api_vm.post.vm.disksize') need_token! if Vmpooler::API.settings.config[:auth] diff --git a/lib/vmpooler/metrics/promstats/collector_middleware.rb b/lib/vmpooler/metrics/promstats/collector_middleware.rb new file mode 100644 index 0000000..c05c866 --- /dev/null +++ b/lib/vmpooler/metrics/promstats/collector_middleware.rb @@ -0,0 +1,119 @@ +# frozen_string_literal: true + +# This is an adapted Collector module for vmpooler based on the sample implementation +# available in the prometheus client_ruby library +# https://github.com/prometheus/client_ruby/blob/master/lib/prometheus/middleware/collector.rb +# +# The code was also failing Rubocop on PR check, so have addressed all the offenses. +# +# The method strip_ids_from_path has been adapted to add a match for hostnames in paths +# to replace with a single ":hostname" string to avoid proliferation of stat lines for +# each new vm hostname deleted, modified or otherwise queried. + +require 'benchmark' +require 'prometheus/client' +require 'vmpooler/logger' + +module Vmpooler + class Metrics + class Promstats + # CollectorMiddleware is an implementation of Rack Middleware customised + # for vmpooler use. + # + # By default metrics are registered on the global registry. Set the + # `:registry` option to use a custom registry. + # + # By default metrics all have the prefix "http_server". Set to something + # else if you like. + # + # The request counter metric is broken down by code, method and path by + # default. Set the `:counter_label_builder` option to use a custom label + # builder. + # + # The request duration metric is broken down by method and path by default. + # Set the `:duration_label_builder` option to use a custom label builder. + # + # Label Builder functions will receive a Rack env and a status code, and must + # return a hash with the labels for that request. They must also accept an empty + # env, and return a hash with the correct keys. This is necessary to initialize + # the metrics with the correct set of labels. + class CollectorMiddleware + attr_reader :app, :registry + + def initialize(app, options = {}) + @app = app + @registry = options[:registry] || Prometheus::Client.registry + @metrics_prefix = options[:metrics_prefix] || 'http_server' + + init_request_metrics + init_exception_metrics + end + + def call(env) # :nodoc: + trace(env) { @app.call(env) } + end + + protected + + def init_request_metrics + @requests = @registry.counter( + :"#{@metrics_prefix}_requests_total", + docstring: + 'The total number of HTTP requests handled by the Rack application.', + labels: %i[code method path] + ) + @durations = @registry.histogram( + :"#{@metrics_prefix}_request_duration_seconds", + docstring: 'The HTTP response duration of the Rack application.', + labels: %i[method path] + ) + end + + def init_exception_metrics + @exceptions = @registry.counter( + :"#{@metrics_prefix}_exceptions_total", + docstring: 'The total number of exceptions raised by the Rack application.', + labels: [:exception] + ) + end + + def trace(env) + response = nil + duration = Benchmark.realtime { response = yield } + record(env, response.first.to_s, duration) + response + rescue StandardError => e + @exceptions.increment(labels: { exception: e.class.name }) + raise + end + + def record(env, code, duration) + counter_labels = { + code: code, + method: env['REQUEST_METHOD'].downcase, + path: strip_ids_from_path(env['PATH_INFO']) + } + + duration_labels = { + method: env['REQUEST_METHOD'].downcase, + path: strip_ids_from_path(env['PATH_INFO']) + } + + @requests.increment(labels: counter_labels) + @durations.observe(duration, labels: duration_labels) + rescue # rubocop:disable Style/RescueStandardError + nil + end + + def strip_ids_from_path(path) + # Custom for /vm path - so we just collect aggrate stats for all usage along this one + # path. Custom counters are then added more specific endpoints in v1.rb + # Since we aren't parsing UID/GIDs as in the original example, these are removed. + path + .gsub(%r{/vm/.+$}, '/vm') + .gsub(%r{/ondemand/.+$}, '/ondemand') + end + end + end + end +end diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index cdd6f63..0d56fbc 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -496,7 +496,12 @@ module Vmpooler return unless jenkins_build_url - # TBD - Add Filter for Litmus here as well - to ignore for the moment. + if jenkins_build_url.include? 'litmus' + # Very simple filter for Litmus jobs - just count them coming through for the moment. + $metrics.increment("usage_litmus.#{user}.#{poolname}") + return + end + url_parts = jenkins_build_url.split('/')[2..-1] jenkins_instance = url_parts[0].gsub('.', '_') value_stream_parts = url_parts[2].split('_') diff --git a/spec/unit/collector_middleware_spec.rb b/spec/unit/collector_middleware_spec.rb new file mode 100644 index 0000000..671e118 --- /dev/null +++ b/spec/unit/collector_middleware_spec.rb @@ -0,0 +1,135 @@ + +require 'rack/test' +require 'vmpooler/metrics/promstats/collector_middleware' + + +describe Vmpooler::Metrics::Promstats::CollectorMiddleware do + include Rack::Test::Methods + + # Reset the data store + before do + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new + end + + let(:registry) do + Prometheus::Client::Registry.new + end + + let(:original_app) do + ->(_) { [200, { 'Content-Type' => 'text/html' }, ['OK']] } + end + + let!(:app) do + described_class.new(original_app, registry: registry) + end + + let(:dummy_error) { RuntimeError.new("Dummy error from tests") } + + it 'returns the app response' do + get '/foo' + + expect(last_response).to be_ok + expect(last_response.body).to eql('OK') + end + + it 'handles errors in the registry gracefully' do + counter = registry.get(:http_server_requests_total) + expect(counter).to receive(:increment).and_raise(dummy_error) + + get '/foo' + + expect(last_response).to be_ok + end + + it 'traces request information' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.2) + + get '/foo' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/foo', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/foo' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.25" => 1) + end + + it 'normalizes paths cotaining /vm by default' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3) + + get '/foo/vm/bar-mumble-flame' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/foo/vm', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/foo/vm' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) + end + + it 'normalizes paths containing /ondemandvm by ' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3) + + get '/foo/ondemand/bar/fatman' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/foo/ondemand', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/foo/ondemand' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) + end + + context 'when the app raises an exception' do + let(:original_app) do + lambda do |env| + raise dummy_error if env['PATH_INFO'] == '/broken' + + [200, { 'Content-Type' => 'text/html' }, ['OK']] + end + end + + before do + get '/foo' + end + + it 'traces exceptions' do + expect { get '/broken' }.to raise_error RuntimeError + + metric = :http_server_exceptions_total + labels = { exception: 'RuntimeError' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + end + end + + context 'when provided a custom metrics_prefix' do + let!(:app) do + described_class.new( + original_app, + registry: registry, + metrics_prefix: 'lolrus', + ) + end + + it 'provides alternate metric names' do + expect( + registry.get(:lolrus_requests_total), + ).to be_a(Prometheus::Client::Counter) + expect( + registry.get(:lolrus_request_duration_seconds), + ).to be_a(Prometheus::Client::Histogram) + expect( + registry.get(:lolrus_exceptions_total), + ).to be_a(Prometheus::Client::Counter) + end + + it "doesn't register the default metrics" do + expect(registry.get(:http_server_requests_total)).to be(nil) + expect(registry.get(:http_server_request_duration_seconds)).to be(nil) + expect(registry.get(:http_server_exceptions_total)).to be(nil) + end + end +end diff --git a/spec/unit/generic_connection_pool_spec.rb b/spec/unit/generic_connection_pool_spec.rb index 350e6f5..5c24215 100644 --- a/spec/unit/generic_connection_pool_spec.rb +++ b/spec/unit/generic_connection_pool_spec.rb @@ -1,7 +1,7 @@ require 'spec_helper' describe 'GenericConnectionPool' do - let(:metrics) { Vmpooler::DummyStatsd.new } + let(:metrics) { Vmpooler::Metrics::DummyStatsd.new } let(:connpool_type) { 'test_connection_pool' } let(:connpool_provider) { 'testprovider' } let(:default_connpool_type) { 'connectionpool' }