(POOLER-177) Filter hostname from API Paths

Use the example provided in the Ruby Client to provide a customised
collector appropriate to log all calls to the API. The customised
filtering is used to replace individual node names and templates
for the /vm and request ID's for the /ondemand endpoints.

This module was failing our rubocop checks so have updated it since
it now forms part of vmpooler.

Separate trapping for litmus jobs is also included so that they don't
interfere with stats from the jenkins pipelines.
This commit is contained in:
John O'Connor 2020-06-17 22:32:56 +01:00
parent b6dcd77228
commit cb955a1bed
7 changed files with 277 additions and 6 deletions

View file

@ -15,9 +15,6 @@ module Vmpooler
require 'timeout'
require 'yaml'
require 'prometheus/middleware/collector'
require 'prometheus/middleware/exporter'
%w[api metrics logger pool_manager generic_connection_pool].each do |lib|
require "vmpooler/#{lib}"
end

View file

@ -35,7 +35,10 @@ module Vmpooler
# in the config file.
metrics.setup_prometheus_metrics
use Prometheus::Middleware::Collector, metrics_prefix: "#{metrics.metrics_prefix}_http"
# Using customised collector that filters out hostnames on API paths
require 'vmpooler/metrics/promstats/collector_middleware'
require 'prometheus/middleware/exporter'
use Vmpooler::Metrics::Promstats::CollectorMiddleware, metrics_prefix: "#{metrics.metrics_prefix}_http"
use Prometheus::Middleware::Exporter, path: metrics.endpoint
end

View file

@ -809,6 +809,7 @@ module Vmpooler
post "#{api_prefix}/ondemandvm/?" do
content_type :json
metrics.increment('api_vm.post.ondemand.requestid')
need_token! if Vmpooler::API.settings.config[:auth]
@ -846,6 +847,7 @@ module Vmpooler
post "#{api_prefix}/ondemandvm/:template/?" do
content_type :json
result = { 'ok' => false }
metrics.increment('api_vm.delete.ondemand.template')
need_token! if Vmpooler::API.settings.config[:auth]
@ -872,6 +874,7 @@ module Vmpooler
get "#{api_prefix}/ondemandvm/:requestid/?" do
content_type :json
metrics.increment('api_vm.get.ondemand.request')
status 404
result = check_ondemand_request(params[:requestid])
@ -882,6 +885,7 @@ module Vmpooler
delete "#{api_prefix}/ondemandvm/:requestid/?" do
content_type :json
need_token! if Vmpooler::API.settings.config[:auth]
metrics.increment('api_vm.delete.ondemand.request')
status 404
result = delete_ondemand_request(params[:requestid])
@ -892,6 +896,7 @@ module Vmpooler
post "#{api_prefix}/vm/?" do
content_type :json
result = { 'ok' => false }
metrics.increment('api_vm.post.vm.checkout')
payload = JSON.parse(request.body.read)
@ -1034,6 +1039,7 @@ module Vmpooler
post "#{api_prefix}/vm/:template/?" do
content_type :json
result = { 'ok' => false }
metrics.increment('api_vm.get.vm.template')
payload = extract_templates_from_query_params(params[:template])
@ -1057,6 +1063,7 @@ module Vmpooler
get "#{api_prefix}/vm/:hostname/?" do
content_type :json
metrics.increment('api_vm.get.vm.hostname')
result = {}
@ -1129,6 +1136,7 @@ module Vmpooler
delete "#{api_prefix}/vm/:hostname/?" do
content_type :json
metrics.increment('api_vm.delete.vm.hostname')
result = {}
@ -1156,6 +1164,7 @@ module Vmpooler
put "#{api_prefix}/vm/:hostname/?" do
content_type :json
metrics.increment('api_vm.put.vm.modify')
status 404
result = { 'ok' => false }
@ -1232,6 +1241,7 @@ module Vmpooler
post "#{api_prefix}/vm/:hostname/disk/:size/?" do
content_type :json
metrics.increment('api_vm.post.vm.disksize')
need_token! if Vmpooler::API.settings.config[:auth]
@ -1255,6 +1265,7 @@ module Vmpooler
post "#{api_prefix}/vm/:hostname/snapshot/?" do
content_type :json
metrics.increment('api_vm.post.vm.snapshot')
need_token! if Vmpooler::API.settings.config[:auth]
@ -1280,6 +1291,7 @@ module Vmpooler
post "#{api_prefix}/vm/:hostname/snapshot/:snapshot/?" do
content_type :json
metrics.increment('api_vm.post.vm.disksize')
need_token! if Vmpooler::API.settings.config[:auth]

View file

@ -0,0 +1,119 @@
# frozen_string_literal: true
# This is an adapted Collector module for vmpooler based on the sample implementation
# available in the prometheus client_ruby library
# https://github.com/prometheus/client_ruby/blob/master/lib/prometheus/middleware/collector.rb
#
# The code was also failing Rubocop on PR check, so have addressed all the offenses.
#
# The method strip_ids_from_path has been adapted to add a match for hostnames in paths
# to replace with a single ":hostname" string to avoid proliferation of stat lines for
# each new vm hostname deleted, modified or otherwise queried.
require 'benchmark'
require 'prometheus/client'
require 'vmpooler/logger'
module Vmpooler
class Metrics
class Promstats
# CollectorMiddleware is an implementation of Rack Middleware customised
# for vmpooler use.
#
# By default metrics are registered on the global registry. Set the
# `:registry` option to use a custom registry.
#
# By default metrics all have the prefix "http_server". Set to something
# else if you like.
#
# The request counter metric is broken down by code, method and path by
# default. Set the `:counter_label_builder` option to use a custom label
# builder.
#
# The request duration metric is broken down by method and path by default.
# Set the `:duration_label_builder` option to use a custom label builder.
#
# Label Builder functions will receive a Rack env and a status code, and must
# return a hash with the labels for that request. They must also accept an empty
# env, and return a hash with the correct keys. This is necessary to initialize
# the metrics with the correct set of labels.
class CollectorMiddleware
attr_reader :app, :registry
def initialize(app, options = {})
@app = app
@registry = options[:registry] || Prometheus::Client.registry
@metrics_prefix = options[:metrics_prefix] || 'http_server'
init_request_metrics
init_exception_metrics
end
def call(env) # :nodoc:
trace(env) { @app.call(env) }
end
protected
def init_request_metrics
@requests = @registry.counter(
:"#{@metrics_prefix}_requests_total",
docstring:
'The total number of HTTP requests handled by the Rack application.',
labels: %i[code method path]
)
@durations = @registry.histogram(
:"#{@metrics_prefix}_request_duration_seconds",
docstring: 'The HTTP response duration of the Rack application.',
labels: %i[method path]
)
end
def init_exception_metrics
@exceptions = @registry.counter(
:"#{@metrics_prefix}_exceptions_total",
docstring: 'The total number of exceptions raised by the Rack application.',
labels: [:exception]
)
end
def trace(env)
response = nil
duration = Benchmark.realtime { response = yield }
record(env, response.first.to_s, duration)
response
rescue StandardError => e
@exceptions.increment(labels: { exception: e.class.name })
raise
end
def record(env, code, duration)
counter_labels = {
code: code,
method: env['REQUEST_METHOD'].downcase,
path: strip_ids_from_path(env['PATH_INFO'])
}
duration_labels = {
method: env['REQUEST_METHOD'].downcase,
path: strip_ids_from_path(env['PATH_INFO'])
}
@requests.increment(labels: counter_labels)
@durations.observe(duration, labels: duration_labels)
rescue # rubocop:disable Style/RescueStandardError
nil
end
def strip_ids_from_path(path)
# Custom for /vm path - so we just collect aggrate stats for all usage along this one
# path. Custom counters are then added more specific endpoints in v1.rb
# Since we aren't parsing UID/GIDs as in the original example, these are removed.
path
.gsub(%r{/vm/.+$}, '/vm')
.gsub(%r{/ondemand/.+$}, '/ondemand')
end
end
end
end
end

View file

@ -496,7 +496,12 @@ module Vmpooler
return unless jenkins_build_url
# TBD - Add Filter for Litmus here as well - to ignore for the moment.
if jenkins_build_url.include? 'litmus'
# Very simple filter for Litmus jobs - just count them coming through for the moment.
$metrics.increment("usage_litmus.#{user}.#{poolname}")
return
end
url_parts = jenkins_build_url.split('/')[2..-1]
jenkins_instance = url_parts[0].gsub('.', '_')
value_stream_parts = url_parts[2].split('_')

View file

@ -0,0 +1,135 @@
require 'rack/test'
require 'vmpooler/metrics/promstats/collector_middleware'
describe Vmpooler::Metrics::Promstats::CollectorMiddleware do
include Rack::Test::Methods
# Reset the data store
before do
Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new
end
let(:registry) do
Prometheus::Client::Registry.new
end
let(:original_app) do
->(_) { [200, { 'Content-Type' => 'text/html' }, ['OK']] }
end
let!(:app) do
described_class.new(original_app, registry: registry)
end
let(:dummy_error) { RuntimeError.new("Dummy error from tests") }
it 'returns the app response' do
get '/foo'
expect(last_response).to be_ok
expect(last_response.body).to eql('OK')
end
it 'handles errors in the registry gracefully' do
counter = registry.get(:http_server_requests_total)
expect(counter).to receive(:increment).and_raise(dummy_error)
get '/foo'
expect(last_response).to be_ok
end
it 'traces request information' do
expect(Benchmark).to receive(:realtime).and_yield.and_return(0.2)
get '/foo'
metric = :http_server_requests_total
labels = { method: 'get', path: '/foo', code: '200' }
expect(registry.get(metric).get(labels: labels)).to eql(1.0)
metric = :http_server_request_duration_seconds
labels = { method: 'get', path: '/foo' }
expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.25" => 1)
end
it 'normalizes paths cotaining /vm by default' do
expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3)
get '/foo/vm/bar-mumble-flame'
metric = :http_server_requests_total
labels = { method: 'get', path: '/foo/vm', code: '200' }
expect(registry.get(metric).get(labels: labels)).to eql(1.0)
metric = :http_server_request_duration_seconds
labels = { method: 'get', path: '/foo/vm' }
expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1)
end
it 'normalizes paths containing /ondemandvm by ' do
expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3)
get '/foo/ondemand/bar/fatman'
metric = :http_server_requests_total
labels = { method: 'get', path: '/foo/ondemand', code: '200' }
expect(registry.get(metric).get(labels: labels)).to eql(1.0)
metric = :http_server_request_duration_seconds
labels = { method: 'get', path: '/foo/ondemand' }
expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1)
end
context 'when the app raises an exception' do
let(:original_app) do
lambda do |env|
raise dummy_error if env['PATH_INFO'] == '/broken'
[200, { 'Content-Type' => 'text/html' }, ['OK']]
end
end
before do
get '/foo'
end
it 'traces exceptions' do
expect { get '/broken' }.to raise_error RuntimeError
metric = :http_server_exceptions_total
labels = { exception: 'RuntimeError' }
expect(registry.get(metric).get(labels: labels)).to eql(1.0)
end
end
context 'when provided a custom metrics_prefix' do
let!(:app) do
described_class.new(
original_app,
registry: registry,
metrics_prefix: 'lolrus',
)
end
it 'provides alternate metric names' do
expect(
registry.get(:lolrus_requests_total),
).to be_a(Prometheus::Client::Counter)
expect(
registry.get(:lolrus_request_duration_seconds),
).to be_a(Prometheus::Client::Histogram)
expect(
registry.get(:lolrus_exceptions_total),
).to be_a(Prometheus::Client::Counter)
end
it "doesn't register the default metrics" do
expect(registry.get(:http_server_requests_total)).to be(nil)
expect(registry.get(:http_server_request_duration_seconds)).to be(nil)
expect(registry.get(:http_server_exceptions_total)).to be(nil)
end
end
end

View file

@ -1,7 +1,7 @@
require 'spec_helper'
describe 'GenericConnectionPool' do
let(:metrics) { Vmpooler::DummyStatsd.new }
let(:metrics) { Vmpooler::Metrics::DummyStatsd.new }
let(:connpool_type) { 'test_connection_pool' }
let(:connpool_provider) { 'testprovider' }
let(:default_connpool_type) { 'connectionpool' }