Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 195 additions & 0 deletions test/integration/prometheus/network_costs_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
package prometheus

// Description - Compares Network Costs from Prometheus and Allocation

import (
// "fmt"
"github.com/opencost/opencost-integration-tests/pkg/api"
"github.com/opencost/opencost-integration-tests/pkg/prometheus"
"github.com/opencost/opencost-integration-tests/pkg/utils"
"slices"
"testing"
"time"
)

const tolerance = 0.05

func TestNetworkCosts(t *testing.T) {
apiObj := api.NewAPI()

testCases := []struct {
name string
window string
aggregate string
accumulate string
}{
{
name: "Yesterday",
window: "24h",
aggregate: "namespace",
accumulate: "false",
},
}

t.Logf("testCases: %v", testCases)

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {

// Any data that is in a "raw allocation only" is not valid in any
// sort of cumulative Allocation (like one that is added).

type NetworkCostsAggregate struct {
PromNetworkTransferBytes float64
PromNetworkReceiveBytes float64
Pods []string
AllocNetworkTransferBytes float64
AllocNetworkReceiveBytes float64
}
networkCostsNamespaceMap := make(map[string]*NetworkCostsAggregate)

queryEnd := time.Now().UTC().Truncate(time.Hour).Add(time.Hour)
endTime := queryEnd.Unix()
// Collect Namespace results from Prometheus
client := prometheus.NewClient()

////////////////////////////////////////////////////////////////////////////
// Network Receive Bytes

// sum(increase(container_network_receive_bytes_total{pod!=""}[24h:5m])) by (pod, namespace)
////////////////////////////////////////////////////////////////////////////

promNetworkReceiveInput := prometheus.PrometheusInput{
Metric: "container_network_receive_bytes_total",
}
promNetworkReceiveInput.IgnoreFilters = map[string][]string{
"pod": {""},
}
promNetworkReceiveInput.Function = []string{"increase", "sum"}
promNetworkReceiveInput.QueryWindow = tc.window
promNetworkReceiveInput.QueryResolution = "5m"
promNetworkReceiveInput.AggregateBy = []string{"pod", "namespace"}
promNetworkReceiveInput.Time = &endTime

promNetworkReceiveResponse, err := client.RunPromQLQuery(promNetworkReceiveInput)
if err != nil {
t.Fatalf("Error while calling Prometheus API %v", err)
}

////////////////////////////////////////////////////////////////////////////
// Network Transfer Bytes

// sum(increase(container_network_transmit_bytes_total{pod!="", %s}[%s:%dm])) by (pod_name, pod, namespace, %s)
////////////////////////////////////////////////////////////////////////////

promNetworkTransferInput := prometheus.PrometheusInput{
Metric: "container_network_transmit_bytes_total",
}
promNetworkTransferInput.IgnoreFilters = map[string][]string{
"pod": {""},
}
promNetworkTransferInput.Function = []string{"increase", "sum"}
promNetworkTransferInput.QueryWindow = tc.window
promNetworkTransferInput.QueryResolution = "5m"
promNetworkTransferInput.AggregateBy = []string{"pod", "namespace"}
promNetworkTransferInput.Time = &endTime

promNetworkTransferResponse, err := client.RunPromQLQuery(promNetworkTransferInput)
if err != nil {
t.Fatalf("Error while calling Prometheus API %v", err)
}

// Network Receive Bytes
for _, promNetworkReceiveResponse := range promNetworkReceiveResponse.Data.Result {
namespace := promNetworkReceiveResponse.Metric.Namespace
pod := promNetworkReceiveResponse.Metric.Pod
networkReceiveBytesPod := promNetworkReceiveResponse.Value.Value
networkCostsNamespace, ok := networkCostsNamespaceMap[namespace]
if !ok {
networkCostsNamespaceMap[namespace] = &NetworkCostsAggregate{
PromNetworkReceiveBytes: networkReceiveBytesPod,
PromNetworkTransferBytes: 0.0,
AllocNetworkReceiveBytes: 0.0,
AllocNetworkTransferBytes: 0.0,
Pods: []string{pod},
}
continue
}

networkCostsNamespace.Pods = append(networkCostsNamespace.Pods, pod)
networkCostsNamespace.PromNetworkReceiveBytes += networkReceiveBytesPod
}

// Network Transfer Bytes
for _, promNetworkTransferResponse := range promNetworkTransferResponse.Data.Result {
namespace := promNetworkTransferResponse.Metric.Namespace
pod := promNetworkTransferResponse.Metric.Pod
networkTransferBytesPod := promNetworkTransferResponse.Value.Value
networkCostsNamespace, ok := networkCostsNamespaceMap[namespace]
if !ok {
networkCostsNamespaceMap[namespace] = &NetworkCostsAggregate{
PromNetworkReceiveBytes: networkTransferBytesPod,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Wrong field assigned in network transfer initialization

When a namespace doesn't exist in the map during Network Transfer Bytes processing, PromNetworkReceiveBytes is incorrectly set to networkTransferBytesPod instead of 0.0. This causes the receive bytes value to be initialized with transfer bytes data, leading to incorrect aggregation and comparison results.

Fix in Cursor Fix in Web

PromNetworkTransferBytes: networkTransferBytesPod,
AllocNetworkReceiveBytes: 0.0,
AllocNetworkTransferBytes: 0.0,
Pods: []string{pod},
}
continue
}
if !slices.Contains(networkCostsNamespace.Pods, pod) {
networkCostsNamespace.Pods = append(networkCostsNamespace.Pods, pod)
}
networkCostsNamespace.PromNetworkTransferBytes += networkTransferBytesPod
}


/////////////////////////////////////////////
// API Client
/////////////////////////////////////////////

// Why doesn't allocation work on Namespace aggregate?
apiResponse, err := apiObj.GetAllocation(api.AllocationRequest{
Window: tc.window,
Aggregate: tc.aggregate,
Accumulate: tc.accumulate,
})

if err != nil {
t.Fatalf("Error while calling Allocation API %v", err)
}
if apiResponse.Code != 200 {
t.Errorf("API returned non-200 code")
}

for namespace, allocationResponseItem := range apiResponse.Data[0] {
networkCostsNamespace, ok := networkCostsNamespaceMap[namespace]
if !ok {
networkCostsNamespaceMap[namespace] = &NetworkCostsAggregate{
PromNetworkReceiveBytes: 0.0,
PromNetworkTransferBytes: 0.0,
AllocNetworkReceiveBytes: allocationResponseItem.NetworkReceiveBytes,
AllocNetworkTransferBytes: allocationResponseItem.NetworkTransferBytes,
}
continue
}
networkCostsNamespace.AllocNetworkReceiveBytes = allocationResponseItem.NetworkReceiveBytes
networkCostsNamespace.AllocNetworkTransferBytes = allocationResponseItem.NetworkTransferBytes
}

for namespace, networkCostValues := range networkCostsNamespaceMap {
t.Logf("Namespace %s", namespace)
withinRange, diff_percent := utils.AreWithinPercentage(networkCostValues.AllocNetworkTransferBytes, networkCostValues.PromNetworkTransferBytes, tolerance)
if !withinRange {
t.Errorf(" - NetworkTransferBytes[Fail]: DifferencePercent: %0.2f, Prometheus: %0.2f, /allocation: %0.2f", diff_percent, networkCostValues.PromNetworkTransferBytes, networkCostValues.AllocNetworkTransferBytes)
} else {
t.Logf(" - NetworkTransferBytes[Pass]: ~ %0.2f", networkCostValues.PromNetworkTransferBytes)
}
if !withinRange {
t.Errorf(" - NetworkReceiveBytes[Fail]: DifferencePercent: %0.2f, Prometheus: %0.2f, /allocation: %0.2f", diff_percent, networkCostValues.PromNetworkReceiveBytes, networkCostValues.AllocNetworkReceiveBytes)
} else {
t.Logf(" - NetworkReceiveBytes[Pass]: ~ %0.2f", networkCostValues.PromNetworkReceiveBytes)
}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: NetworkReceiveBytes validation uses wrong comparison result

The withinRange variable from the NetworkTransferBytes comparison on line 181 is reused for NetworkReceiveBytes validation without recalculating it. This means NetworkReceiveBytes always reports the same pass/fail status as NetworkTransferBytes, regardless of the actual NetworkReceiveBytes values. The code needs to call utils.AreWithinPercentage again for NetworkReceiveBytes before line 187.

Fix in Cursor Fix in Web

}
})
}
}
180 changes: 180 additions & 0 deletions test/integration/prometheus/network_internet_costs_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
package prometheus

// Description - Compares Network Internet Costs from Prometheus and Allocation

import (
// "fmt"
"github.com/opencost/opencost-integration-tests/pkg/api"
"github.com/opencost/opencost-integration-tests/pkg/prometheus"
"github.com/opencost/opencost-integration-tests/pkg/utils"
"testing"
"time"
)

const tolerance = 0.05

func TestNetworkInternetCosts(t *testing.T) {
apiObj := api.NewAPI()

testCases := []struct {
name string
window string
aggregate string
accumulate string
}{
{
name: "Yesterday",
window: "24h",
aggregate: "pod",
accumulate: "false",
},
}

t.Logf("testCases: %v", testCases)

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {

// Any data that is in a "raw allocation only" is not valid in any
// sort of cumulative Allocation (like one that is added).

type NetworkCostsAggregate struct {
PromNetworkInternetGiB float64
AllocNetworkInternetGiB float64
}

networkCostsPodMap := make(map[string]*NetworkCostsAggregate)

queryEnd := time.Now().UTC().Truncate(time.Hour).Add(time.Hour)
endTime := queryEnd.Unix()
// Collect Namespace results from Prometheus
client := prometheus.NewClient()

////////////////////////////////////////////////////////////////////////////
// Network Internet GiB

// sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[24h:5m])) by (pod_name, namespace) / 1024 / 1024 / 1024
// Apply Division by 1024^3 when you are looping over the response
////////////////////////////////////////////////////////////////////////////

promNetworkInternetInput := prometheus.PrometheusInput{
Metric: "kubecost_pod_network_egress_bytes_total",
}
promNetworkInternetInput.Filters = map[string]string{
"internet": "true",
}
promNetworkInternetInput.Function = []string{"increase", "sum"}
promNetworkInternetInput.QueryWindow = tc.window
promNetworkInternetInput.QueryResolution = "5m"
promNetworkInternetInput.AggregateBy = []string{"pod_name", "namespace"}
promNetworkInternetInput.Time = &endTime

promNetworkInternetResponse, err := client.RunPromQLQuery(promNetworkInternetInput)
if err != nil {
t.Fatalf("Error while calling Prometheus API %v", err)
}

////////////////////////////////////////////////////////////////////////////
// Network Internet price per GiB

// avg(avg_over_time(kubecost_network_internet_egress_cost{%s}[%s])) by (%s)
////////////////////////////////////////////////////////////////////////////

promNetworkInternetCostInput := prometheus.PrometheusInput{
Metric: "kubecost_network_internet_egress_cost",
}
promNetworkInternetCostInput.Function = []string{"avg_over_time", "avg"}
promNetworkInternetCostInput.QueryWindow = tc.window
promNetworkInternetCostInput.Time = &endTime

promNetworkInternetCostResponse, err := client.RunPromQLQuery(promNetworkInternetCostInput)
if err != nil {
t.Fatalf("Error while calling Prometheus API %v", err)
}

// --------------------------------
// Network Internet Cost for all Pods
// --------------------------------

networkInternetCost := promNetworkInternetCostResponse.Data.Result[0].Value.Value
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Potential panic from accessing empty result array

The code accesses Data.Result[0] without checking if the Result array is empty. If the Prometheus query returns no results, this causes a panic. This can happen when the network-costs daemonset isn't running or when there's no matching data for the query window.

Additional Locations (2)

Fix in Cursor Fix in Web


// --------------------------------
// Assign Network Costs to Pods and Cumulate based on Namespace
// --------------------------------

// Form a key based on namespace and pod name

for _, promNetworkInternetItem := range promNetworkInternetResponse.Data.Result {
// namespace := promNetworkInternetItem.Metric.Namespace
pod := promNetworkInternetItem.Metric.PodName
gib := promNetworkInternetItem.Value.Value


networkCostsPodMap[pod] = &NetworkCostsAggregate{
PromNetworkInternetGiB: (gib / 1024 / 1024 / 1024) * networkInternetCost,
AllocNetworkInternetGiB: 0.0,
}

// networkCostsNamespace, ok := networkCostsPodMap[namespace]
// if !ok {
// networkCostsPodMap[pod] = &NetworkCostsAggregate{
// PromNetworkInternetGiB: (gib / 1024 / 1024 / 1024) * networkInternetCost,
// AllocNetworkInternetGiB: 0.0,
// }
// continue
// }
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this commented?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted the option to drill down to a pod level if needed. do you think we need to go down further down?

}


/////////////////////////////////////////////
// API Client
/////////////////////////////////////////////

// Why doesn't allocation work on Namespace aggregate?
apiResponse, err := apiObj.GetAllocation(api.AllocationRequest{
Window: tc.window,
Aggregate: tc.aggregate,
Accumulate: tc.accumulate,
})

if err != nil {
t.Fatalf("Error while calling Allocation API %v", err)
}
if apiResponse.Code != 200 {
t.Errorf("API returned non-200 code")
}

for pod, allocationResponseItem := range apiResponse.Data[0] {
networkCostsPod, ok := networkCostsPodMap[pod]
if !ok {
networkCostsPodMap[pod] = &NetworkCostsAggregate{
PromNetworkInternetGiB: 0.0,
AllocNetworkInternetGiB: allocationResponseItem.NetworkInternetCost,
}
continue
}
networkCostsPod.AllocNetworkInternetGiB = allocationResponseItem.NetworkInternetCost
}

validCostsSeen := false
negligilbleCost := 0.01 // 1 Cent of a Dollar
for pod, networkCostValues := range networkCostsPodMap {
if networkCostValues.AllocNetworkInternetGiB < negligilbleCost {
continue
} else {
validCostsSeen = true
}
t.Logf("Pod %s", pod)
withinRange, diff_percent := utils.AreWithinPercentage(networkCostValues.AllocNetworkInternetGiB, networkCostValues.PromNetworkInternetGiB, tolerance)
if !withinRange {
t.Errorf(" - NetworkInternetCost[Fail]: DifferencePercent: %0.2f, Prometheus: %0.9f, /allocation: %0.9f", diff_percent, networkCostValues.PromNetworkInternetGiB, networkCostValues.AllocNetworkInternetGiB)
} else {
t.Logf(" - NetworkInternetCost[Pass]: ~ %0.5f", networkCostValues.PromNetworkInternetGiB)
}
}
if !validCostsSeen {
t.Errorf("NetWork Internet Costs for all Pods are below 1 cent and hence cannot be considered as costs from resource usage and validated.")
}
})
}
}
Loading
Loading