Skip to content
40 changes: 40 additions & 0 deletions collector/helper.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package collector

import (
"strings"

"github.com/prometheus/client_golang/prometheus"
)

Expand All @@ -22,6 +24,33 @@ func newUpMetric(namespace string, constLabels map[string]string) prometheus.Gau
})
}

func newScrapeSuccessMetric(namespace string, constLabels map[string]string) prometheus.Gauge {
return prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Name: "scrape_success",
Help: "Whether the last scrape of NGINX metrics was successful",
ConstLabels: constLabels,
})
}

func newScrapeDurationMetric(namespace string, constLabels map[string]string) prometheus.Gauge {
return prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Name: "scrape_duration_seconds",
Help: "Duration of the last scrape in seconds",
ConstLabels: constLabels,
})
}

func newScrapeErrorsTotalMetric(namespace string, constLabels map[string]string) *prometheus.CounterVec {
return prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Name: "scrape_errors_total",
Help: "Total number of scrape errors by type",
ConstLabels: constLabels,
}, []string{"type"})
}

// MergeLabels merges two maps of labels.
func MergeLabels(a map[string]string, b map[string]string) map[string]string {
c := make(map[string]string)
Expand All @@ -35,3 +64,14 @@ func MergeLabels(a map[string]string, b map[string]string) map[string]string {

return c
}

func isNetworkError(errorMsg string) bool {
return strings.Contains(errorMsg, "failed to get") ||
strings.Contains(errorMsg, "connection") ||
strings.Contains(errorMsg, "timeout") ||
strings.Contains(errorMsg, "refused")
}

func isHTTPError(errorMsg string) bool {
return strings.Contains(errorMsg, "expected 200 response")
}
65 changes: 56 additions & 9 deletions collector/nginx.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,22 @@ package collector
import (
"log/slog"
"sync"
"time"

"github.com/nginx/nginx-prometheus-exporter/client"
"github.com/prometheus/client_golang/prometheus"
)

// NginxCollector collects NGINX metrics. It implements prometheus.Collector interface.
type NginxCollector struct {
upMetric prometheus.Gauge
logger *slog.Logger
nginxClient *client.NginxClient
metrics map[string]*prometheus.Desc
mutex sync.Mutex
upMetric prometheus.Gauge
scrapeSuccessMetric prometheus.Gauge
scrapeDurationMetric prometheus.Gauge
scrapeErrorsTotal *prometheus.CounterVec
logger *slog.Logger
nginxClient *client.NginxClient
metrics map[string]*prometheus.Desc
mutex sync.Mutex
}

// NewNginxCollector creates an NginxCollector.
Expand All @@ -31,14 +35,20 @@ func NewNginxCollector(nginxClient *client.NginxClient, namespace string, constL
"connections_waiting": newGlobalMetric(namespace, "connections_waiting", "Idle client connections", constLabels),
"http_requests_total": newGlobalMetric(namespace, "http_requests_total", "Total http requests", constLabels),
},
upMetric: newUpMetric(namespace, constLabels),
upMetric: newUpMetric(namespace, constLabels),
scrapeSuccessMetric: newScrapeSuccessMetric(namespace, constLabels),
scrapeDurationMetric: newScrapeDurationMetric(namespace, constLabels),
scrapeErrorsTotal: newScrapeErrorsTotalMetric(namespace, constLabels),
}
}

// Describe sends the super-set of all possible descriptors of NGINX metrics
// to the provided channel.
func (c *NginxCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.upMetric.Desc()
ch <- c.scrapeSuccessMetric.Desc()
ch <- c.scrapeDurationMetric.Desc()
c.scrapeErrorsTotal.Describe(ch)

for _, m := range c.metrics {
ch <- m
Expand All @@ -50,16 +60,53 @@ func (c *NginxCollector) Collect(ch chan<- prometheus.Metric) {
c.mutex.Lock() // To protect metrics from concurrent collects
defer c.mutex.Unlock()

start := time.Now()
stats, err := c.nginxClient.GetStubStats()
duration := time.Since(start).Seconds()
c.scrapeDurationMetric.Set(duration)
ch <- c.scrapeDurationMetric

if err != nil {
c.upMetric.Set(nginxDown)
ch <- c.upMetric
c.logger.Error("error getting stats", "uri", c.nginxClient.GetAPIEndpoint(), "error", err)
c.handleScrapeError(ch, err)
return
}

c.handleScrapeSuccess(ch, stats)
}

func (c *NginxCollector) handleScrapeError(ch chan<- prometheus.Metric, err error) {
errorMsg := err.Error()
var errorType string

switch {
case isNetworkError(errorMsg):
c.upMetric.Set(nginxDown)
errorType = "network"
case isHTTPError(errorMsg):
c.upMetric.Set(nginxUp)
errorType = "http"
default:
c.upMetric.Set(nginxUp)
errorType = "parse"
}

c.scrapeErrorsTotal.WithLabelValues(errorType).Inc()
c.scrapeSuccessMetric.Set(0)

ch <- c.upMetric
ch <- c.scrapeSuccessMetric
c.scrapeErrorsTotal.Collect(ch)

c.logger.Error("error getting stats", "error", err.Error(), "type", errorType)
}

func (c *NginxCollector) handleScrapeSuccess(ch chan<- prometheus.Metric, stats *client.StubStats) {
c.upMetric.Set(nginxUp)
c.scrapeSuccessMetric.Set(1)

ch <- c.upMetric
ch <- c.scrapeSuccessMetric
c.scrapeErrorsTotal.Collect(ch)

ch <- prometheus.MustNewConstMetric(c.metrics["connections_active"],
prometheus.GaugeValue, float64(stats.Connections.Active))
Expand Down
142 changes: 142 additions & 0 deletions collector/nginx_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package collector

import (
"strings"
"testing"

"github.com/prometheus/client_golang/prometheus"
)

func TestIsNetworkError(t *testing.T) {
t.Parallel()

tests := []struct {
name string
errorMsg string
want bool
}{
{
name: "network connection error",
errorMsg: "failed to get http://localhost:8080/stub_status: connection refused",
want: true,
},
{
name: "network timeout error",
errorMsg: "failed to get http://localhost:8080/stub_status: timeout",
want: true,
},
{
name: "HTTP error",
errorMsg: "expected 200 response, got 404",
want: false,
},
{
name: "parse error",
errorMsg: "failed to parse response body",
want: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
if got := isNetworkError(tt.errorMsg); got != tt.want {
t.Errorf("isNetworkError() = %v, want %v", got, tt.want)
}
})
}
}

func TestIsHTTPError(t *testing.T) {
t.Parallel()

tests := []struct {
name string
errorMsg string
want bool
}{
{
name: "HTTP 404 error",
errorMsg: "expected 200 response, got 404",
want: true,
},
{
name: "HTTP 500 error",
errorMsg: "expected 200 response, got 500",
want: true,
},
{
name: "network error",
errorMsg: "failed to get http://localhost:8080/stub_status: connection refused",
want: false,
},
{
name: "parse error",
errorMsg: "failed to parse response body",
want: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
if got := isHTTPError(tt.errorMsg); got != tt.want {
t.Errorf("isHTTPError() = %v, want %v", got, tt.want)
}
})
}
}

func TestNewScrapeSuccessMetric(t *testing.T) {
t.Parallel()

metric := newScrapeSuccessMetric("nginx", map[string]string{"job": "nginx"})

if metric == nil {
t.Error("newScrapeSuccessMetric() returned nil")
}

desc := metric.Desc().String()
if !strings.Contains(desc, "nginx_scrape_success") {
t.Errorf("metric description should contain 'nginx_scrape_success', got: %s", desc)
}
}

func TestNewScrapeDurationMetric(t *testing.T) {
t.Parallel()

metric := newScrapeDurationMetric("nginx", map[string]string{"job": "nginx"})

if metric == nil {
t.Error("newScrapeDurationMetric() returned nil")
}

desc := metric.Desc().String()
if !strings.Contains(desc, "nginx_scrape_duration_seconds") {
t.Errorf("metric description should contain 'nginx_scrape_duration_seconds', got: %s", desc)
}
}

func TestNewScrapeErrorsTotalMetric(t *testing.T) {
t.Parallel()

metric := newScrapeErrorsTotalMetric("nginx", map[string]string{"job": "nginx"})

if metric == nil {
t.Error("newScrapeErrorsTotalMetric() returned nil")
return
}

ch := make(chan *prometheus.Desc, 10)
metric.Describe(ch)
close(ch)

count := 0
for range ch {
count++
}

if count == 0 {
t.Error("metric should have descriptions")
}
}