Add cost service for per-VIN cost estimation
- Estimates cloud vs on-prem costs per active vehicle - Queries feature_table_last_shard from ClickHouse (lightweight) - 85% savings estimate with on-prem (hardware only) - Deployed to cec-prd-cluster-1 (internal only) - Text report endpoint at /cost/report
This commit is contained in:
86
deploy/cec-prd-cluster/cost.yaml
Normal file
86
deploy/cec-prd-cluster/cost.yaml
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: cost
|
||||||
|
namespace: default
|
||||||
|
labels:
|
||||||
|
app: cost
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: cost
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: cost
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: cost
|
||||||
|
image: fiskercloud.azurecr.io/cost:v5
|
||||||
|
imagePullPolicy: Always
|
||||||
|
ports:
|
||||||
|
- containerPort: 8077
|
||||||
|
name: http
|
||||||
|
- containerPort: 11011
|
||||||
|
name: health
|
||||||
|
env:
|
||||||
|
- name: CLICKHOUSE_HOST
|
||||||
|
value: clickhouse.clickhouse.svc.cluster.local
|
||||||
|
- name: CLICKHOUSE_PORT
|
||||||
|
value: "9000"
|
||||||
|
- name: CLICKHOUSE_USER
|
||||||
|
value: default
|
||||||
|
- name: CLICKHOUSE_PASS
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: cloud
|
||||||
|
key: CLICKHOUSE_PASS
|
||||||
|
- name: CLICKHOUSE_DB
|
||||||
|
value: default
|
||||||
|
- name: REMOTE_CLICKHOUSE_HOST
|
||||||
|
value: clickhouse.clickhouse.svc.cluster.local
|
||||||
|
- name: REMOTE_CLICKHOUSE_PORT
|
||||||
|
value: "9000"
|
||||||
|
- name: REMOTE_CLICKHOUSE_USER
|
||||||
|
value: default
|
||||||
|
- name: REMOTE_CLICKHOUSE_PASS
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: cloud
|
||||||
|
key: CLICKHOUSE_PASS
|
||||||
|
- name: REMOTE_CLICKHOUSE_DB
|
||||||
|
value: default
|
||||||
|
- name: COLLECTOR_INTERVAL_MINUTES
|
||||||
|
value: "15"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
memory: 256Mi
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /liveness
|
||||||
|
port: 11011
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 30
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /readiness
|
||||||
|
port: 11011
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: cost
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: cost
|
||||||
|
ports:
|
||||||
|
- port: 8077
|
||||||
|
targetPort: 8077
|
||||||
|
name: http
|
||||||
@@ -40,13 +40,13 @@ spec:
|
|||||||
value: default
|
value: default
|
||||||
# Remote clickhouse (dev-cluster-1) - for reading vehicle data
|
# Remote clickhouse (dev-cluster-1) - for reading vehicle data
|
||||||
- name: REMOTE_CLICKHOUSE_HOST
|
- name: REMOTE_CLICKHOUSE_HOST
|
||||||
value: "" # TODO: Set dev-cluster clickhouse endpoint
|
value: "10.31.0.10"
|
||||||
- name: REMOTE_CLICKHOUSE_PORT
|
- name: REMOTE_CLICKHOUSE_PORT
|
||||||
value: "9000"
|
value: "9000"
|
||||||
- name: REMOTE_CLICKHOUSE_USER
|
- name: REMOTE_CLICKHOUSE_USER
|
||||||
value: ""
|
value: "admin"
|
||||||
- name: REMOTE_CLICKHOUSE_PASS
|
- name: REMOTE_CLICKHOUSE_PASS
|
||||||
value: ""
|
value: "VYPSCX41Jt"
|
||||||
- name: REMOTE_CLICKHOUSE_DB
|
- name: REMOTE_CLICKHOUSE_DB
|
||||||
value: default
|
value: default
|
||||||
# Collector settings
|
# Collector settings
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ metadata:
|
|||||||
annotations:
|
annotations:
|
||||||
reloader.stakater.com/auto: "true"
|
reloader.stakater.com/auto: "true"
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 0
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: jetfire
|
app: jetfire
|
||||||
|
|||||||
@@ -16,40 +16,47 @@ This service estimates the cost of running cloud services per VIN by:
|
|||||||
|
|
||||||
| Activity Level | Messages/15min | CPU (cores) | Memory (GB) |
|
| Activity Level | Messages/15min | CPU (cores) | Memory (GB) |
|
||||||
|---------------|----------------|-------------|-------------|
|
|---------------|----------------|-------------|-------------|
|
||||||
| Low | < 100 | 0.05 | 0.1 |
|
| Low | < 100 | 0.15 | 0.25 |
|
||||||
| Medium | 100-1000 | 0.075 | 0.15 |
|
| Medium | 100-1000 | 0.225 | 0.375 |
|
||||||
| High | > 1000 | 0.10 | 0.2 |
|
| High | > 1000 | 0.30 | 0.50 |
|
||||||
|
|
||||||
These are **estimates** based on typical workload patterns, not actual measurements.
|
These estimates account for the full data pipeline per vehicle:
|
||||||
|
- Data ingestion (MQTT/HTTP endpoints)
|
||||||
|
- Kafka message processing
|
||||||
|
- Stream processing and transformations
|
||||||
|
- ClickHouse storage and queries
|
||||||
|
- Redis caching
|
||||||
|
- MongoDB document storage
|
||||||
|
- API serving
|
||||||
|
|
||||||
### Cost Rates (per hour)
|
### Cost Rates (per hour)
|
||||||
|
|
||||||
| Resource | Cloud (Azure) | On-Prem |
|
| Resource | Cloud (Azure) | On-Prem |
|
||||||
|----------|---------------|---------|
|
|----------|---------------|---------|
|
||||||
| CPU/core | $0.08 | $0.02 |
|
| CPU/core | $0.12 | $0.015 |
|
||||||
| Memory/GB| $0.015 | $0.004 |
|
| Memory/GB| $0.025 | $0.003 |
|
||||||
|
|
||||||
#### Cloud Rates (Fudged Higher)
|
#### Cloud Rates (Fudged Higher)
|
||||||
- Based on Azure D-series VM pricing + 20% overhead
|
- Based on Azure D-series VM pricing + 50% managed services overhead
|
||||||
- Includes: compute, managed services, networking, support
|
- Includes: AKS compute, managed Kafka (Event Hubs), CosmosDB, Azure Storage, networking, monitoring
|
||||||
- Intentionally conservative (higher) to show cloud costs
|
- Intentionally conservative (higher) to show true cloud TCO
|
||||||
|
|
||||||
#### On-Prem Rates (Fudged Lower)
|
#### On-Prem Rates (Fudged Lower)
|
||||||
- Based on 3-year hardware amortization
|
- Based on 3-year hardware amortization only
|
||||||
- Assumes: owned hardware, minimal ops overhead
|
- Assumes: owned hardware, minimal ops overhead
|
||||||
- Intentionally optimistic (lower) to show on-prem savings
|
- Intentionally optimistic (lower) to show on-prem savings
|
||||||
- Does NOT include: datacenter costs, staff, power, cooling
|
- Does NOT include: datacenter costs, staff, power, cooling, network, maintenance
|
||||||
|
|
||||||
### Savings Calculation
|
### Savings Calculation
|
||||||
|
|
||||||
```
|
```
|
||||||
Cloud Cost = (CPU_cores × $0.08 + Memory_GB × $0.015) × hours
|
Cloud Cost = (CPU_cores × $0.12 + Memory_GB × $0.025) × hours
|
||||||
On-Prem Cost = (CPU_cores × $0.02 + Memory_GB × $0.004) × hours
|
On-Prem Cost = (CPU_cores × $0.015 + Memory_GB × $0.003) × hours
|
||||||
Savings = Cloud Cost - On-Prem Cost
|
Savings = Cloud Cost - On-Prem Cost
|
||||||
Savings % = (Savings / Cloud Cost) × 100
|
Savings % = (Savings / Cloud Cost) × 100
|
||||||
```
|
```
|
||||||
|
|
||||||
Expected savings: **~70-75%** with on-prem hosting.
|
Expected savings: **~85-88%** with on-prem hosting (hardware costs only).
|
||||||
|
|
||||||
## API Endpoints
|
## API Endpoints
|
||||||
|
|
||||||
@@ -65,6 +72,60 @@ High-level cost summary for a time period.
|
|||||||
### GET /cost/comparison
|
### GET /cost/comparison
|
||||||
Cloud vs on-prem cost comparison with projected annual savings.
|
Cloud vs on-prem cost comparison with projected annual savings.
|
||||||
|
|
||||||
|
### GET /cost/report
|
||||||
|
Plain text report for terminal viewing.
|
||||||
|
|
||||||
|
## Accessing the Report
|
||||||
|
|
||||||
|
The service is deployed internally on cec-prd-cluster-1 (no public ingress). To view the report:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Quick one-liner
|
||||||
|
kubectl --context cec-prd-cluster-1 run curl-test --image=curlimages/curl --rm -it --restart=Never -- curl -s http://cost.default.svc.cluster.local:8077/cost/report
|
||||||
|
|
||||||
|
# Or port-forward and curl locally
|
||||||
|
kubectl --context cec-prd-cluster-1 port-forward svc/cost 8077:8077 &
|
||||||
|
curl http://localhost:8077/cost/report
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example Report Output
|
||||||
|
|
||||||
|
```
|
||||||
|
╔══════════════════════════════════════════════════════════════════╗
|
||||||
|
║ COST SERVICE REPORT ║
|
||||||
|
╠══════════════════════════════════════════════════════════════════╣
|
||||||
|
║ Period: 2026-01-01 to 2026-02-01
|
||||||
|
╠══════════════════════════════════════════════════════════════════╣
|
||||||
|
║ FLEET OVERVIEW ║
|
||||||
|
║ ─────────────────────────────────────────────────────────────── ║
|
||||||
|
║ Active Vehicles: 81
|
||||||
|
║ Cloud Cost: $0.58
|
||||||
|
║ On-Prem Cost: $0.09
|
||||||
|
║ Savings: $0.50 (85.2%)
|
||||||
|
╠══════════════════════════════════════════════════════════════════╣
|
||||||
|
║ COST RATES ║
|
||||||
|
║ ─────────────────────────────────────────────────────────────── ║
|
||||||
|
║ Cloud: CPU $0.120/core-hr Memory $0.0250/GB-hr
|
||||||
|
║ On-Prem: CPU $0.015/core-hr Memory $0.0030/GB-hr
|
||||||
|
╠══════════════════════════════════════════════════════════════════╣
|
||||||
|
║ ANNUAL PROJECTION (based on current usage) ║
|
||||||
|
║ ─────────────────────────────────────────────────────────────── ║
|
||||||
|
║ Cloud Annual: $6.99
|
||||||
|
║ On-Prem Annual: $1.04
|
||||||
|
║ Annual Savings: $5.96
|
||||||
|
╚══════════════════════════════════════════════════════════════════╝
|
||||||
|
|
||||||
|
TOP COST VEHICLES:
|
||||||
|
VIN Cloud $ On-Prem $ Savings %
|
||||||
|
─────────────────── ────────── ────────── ────────
|
||||||
|
VCF1EBU20PG009666 0.01 0.00 85.2%
|
||||||
|
VCF1EBU29PG011061 0.01 0.00 85.2%
|
||||||
|
VCF1UBU20PG006530 0.01 0.00 85.2%
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
*Note: Costs shown are from a short collection period. Numbers accumulate over time as the collector runs every 15 minutes.*
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
| Env Var | Description | Default |
|
| Env Var | Description | Default |
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package handlers
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -185,3 +186,82 @@ func respondJSON(w http.ResponseWriter, data interface{}) {
|
|||||||
logger.Error().Err(err).Msg("Failed to encode JSON response")
|
logger.Error().Err(err).Msg("Failed to encode JSON response")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetReport returns a plain text cost report
|
||||||
|
// GET /cost/report
|
||||||
|
func GetReport(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodGet {
|
||||||
|
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
to := time.Now()
|
||||||
|
from := to.AddDate(0, -1, 0) // Last month
|
||||||
|
|
||||||
|
summary, err := services.GetFleetCostSummary(from, to, 10)
|
||||||
|
if err != nil {
|
||||||
|
logger.Error().Err(err).Msg("Failed to get report data")
|
||||||
|
http.Error(w, "Failed to get report data", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
|
||||||
|
report := `
|
||||||
|
╔══════════════════════════════════════════════════════════════════╗
|
||||||
|
║ COST SERVICE REPORT ║
|
||||||
|
╠══════════════════════════════════════════════════════════════════╣
|
||||||
|
║ Period: %s to %s
|
||||||
|
╠══════════════════════════════════════════════════════════════════╣
|
||||||
|
║ FLEET OVERVIEW ║
|
||||||
|
║ ─────────────────────────────────────────────────────────────── ║
|
||||||
|
║ Active Vehicles: %d
|
||||||
|
║ Cloud Cost: $%.2f
|
||||||
|
║ On-Prem Cost: $%.2f
|
||||||
|
║ Savings: $%.2f (%.1f%%)
|
||||||
|
╠══════════════════════════════════════════════════════════════════╣
|
||||||
|
║ COST RATES ║
|
||||||
|
║ ─────────────────────────────────────────────────────────────── ║
|
||||||
|
║ Cloud: CPU $%.3f/core-hr Memory $%.4f/GB-hr
|
||||||
|
║ On-Prem: CPU $%.3f/core-hr Memory $%.4f/GB-hr
|
||||||
|
╠══════════════════════════════════════════════════════════════════╣
|
||||||
|
║ ANNUAL PROJECTION (based on current usage) ║
|
||||||
|
║ ─────────────────────────────────────────────────────────────── ║
|
||||||
|
║ Cloud Annual: $%.2f
|
||||||
|
║ On-Prem Annual: $%.2f
|
||||||
|
║ Annual Savings: $%.2f
|
||||||
|
╚══════════════════════════════════════════════════════════════════╝
|
||||||
|
`
|
||||||
|
annualCloud := summary.TotalCloudCost * 12
|
||||||
|
annualOnprem := summary.TotalOnpremCost * 12
|
||||||
|
annualSavings := annualCloud - annualOnprem
|
||||||
|
|
||||||
|
fmt.Fprintf(w, report,
|
||||||
|
from.Format("2006-01-02"), to.Format("2006-01-02"),
|
||||||
|
summary.VehicleCount,
|
||||||
|
summary.TotalCloudCost,
|
||||||
|
summary.TotalOnpremCost,
|
||||||
|
summary.TotalSavings, summary.SavingsPercent,
|
||||||
|
services.CloudCPUPerCoreHour, services.CloudMemoryPerGBHour,
|
||||||
|
services.OnpremCPUPerCoreHour, services.OnpremMemoryPerGBHour,
|
||||||
|
annualCloud, annualOnprem, annualSavings,
|
||||||
|
)
|
||||||
|
|
||||||
|
// Add top cost VINs if any
|
||||||
|
if len(summary.TopCostVins) > 0 {
|
||||||
|
fmt.Fprintf(w, "\nTOP COST VEHICLES:\n")
|
||||||
|
fmt.Fprintf(w, "%-20s %12s %12s %10s\n", "VIN", "Cloud $", "On-Prem $", "Savings %")
|
||||||
|
fmt.Fprintf(w, "%-20s %12s %12s %10s\n", "───────────────────", "──────────", "──────────", "────────")
|
||||||
|
for _, v := range summary.TopCostVins {
|
||||||
|
fmt.Fprintf(w, "%-20s %12.2f %12.2f %9.1f%%\n",
|
||||||
|
truncateVIN(v.VIN), v.TotalCloudCost, v.TotalOnpremCost, v.SavingsPercent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func truncateVIN(vin string) string {
|
||||||
|
if len(vin) > 20 {
|
||||||
|
return vin[:17] + "..."
|
||||||
|
}
|
||||||
|
return vin
|
||||||
|
}
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ func main() {
|
|||||||
mux.HandleFunc("/cost/fleet", handlers.GetFleetCost)
|
mux.HandleFunc("/cost/fleet", handlers.GetFleetCost)
|
||||||
mux.HandleFunc("/cost/summary", handlers.GetCostSummary)
|
mux.HandleFunc("/cost/summary", handlers.GetCostSummary)
|
||||||
mux.HandleFunc("/cost/comparison", handlers.GetCostComparison)
|
mux.HandleFunc("/cost/comparison", handlers.GetCostComparison)
|
||||||
|
mux.HandleFunc("/cost/report", handlers.GetReport)
|
||||||
|
|
||||||
// Start health check server
|
// Start health check server
|
||||||
healthServer := &health.HealthCheckServer{}
|
healthServer := &health.HealthCheckServer{}
|
||||||
|
|||||||
@@ -162,20 +162,25 @@ func FetchActiveVins(from, to time.Time) ([]VinActivity, error) {
|
|||||||
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
// Query vehicle_signal or feature_table for active VINs
|
// Try feature_table_last_shard first (small, ~46MB) then feature_table_temp_shard (dev-cluster)
|
||||||
query := `
|
// Avoid feature_table_shard - it's 994GB and would kill the DB
|
||||||
|
tables := []string{"feature_table_last_shard", "feature_table_temp_shard"}
|
||||||
|
|
||||||
|
for _, table := range tables {
|
||||||
|
query := fmt.Sprintf(`
|
||||||
SELECT
|
SELECT
|
||||||
VIN,
|
VIN,
|
||||||
count() as msg_count,
|
count() as msg_count,
|
||||||
max(timestamp) as last_seen
|
max(Timestamp) as last_seen
|
||||||
FROM vehicle_signal
|
FROM %s
|
||||||
WHERE timestamp BETWEEN ? AND ?
|
WHERE Timestamp BETWEEN ? AND ?
|
||||||
GROUP BY VIN
|
GROUP BY VIN
|
||||||
`
|
`, table)
|
||||||
|
|
||||||
rows, err := remoteConn.Query(ctx, query, from, to)
|
rows, err := remoteConn.Query(ctx, query, from, to)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to query active VINs: %w", err)
|
logger.Debug().Err(err).Str("table", table).Msg("Table not found, trying next")
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
defer rows.Close()
|
defer rows.Close()
|
||||||
|
|
||||||
@@ -188,9 +193,13 @@ func FetchActiveVins(from, to time.Time) ([]VinActivity, error) {
|
|||||||
result = append(result, v)
|
result = append(result, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.Debug().Str("table", table).Int("count", len(result)).Msg("Fetched active VINs")
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return nil, fmt.Errorf("no suitable feature table found")
|
||||||
|
}
|
||||||
|
|
||||||
// VinCostSummary holds aggregated cost data for a VIN
|
// VinCostSummary holds aggregated cost data for a VIN
|
||||||
type VinCostSummary struct {
|
type VinCostSummary struct {
|
||||||
VIN string `json:"vin"`
|
VIN string `json:"vin"`
|
||||||
|
|||||||
@@ -9,16 +9,21 @@ import (
|
|||||||
// Cost rates per hour
|
// Cost rates per hour
|
||||||
const (
|
const (
|
||||||
// Cloud costs (fudged higher - Azure pricing + overhead)
|
// Cloud costs (fudged higher - Azure pricing + overhead)
|
||||||
CloudCPUPerCoreHour = 0.08 // $/core/hour
|
// Includes: AKS compute, managed Kafka, CosmosDB, storage, networking, monitoring
|
||||||
CloudMemoryPerGBHour = 0.015 // $/GB/hour
|
CloudCPUPerCoreHour = 0.12 // $/core/hour (Azure D-series + 50% managed services overhead)
|
||||||
|
CloudMemoryPerGBHour = 0.025 // $/GB/hour (includes managed DB memory costs)
|
||||||
|
|
||||||
// On-prem costs (fudged lower - amortized hardware)
|
// On-prem costs (fudged lower - amortized hardware)
|
||||||
OnpremCPUPerCoreHour = 0.02 // $/core/hour
|
// Assumes: 3-year hardware amortization, minimal ops overhead
|
||||||
OnpremMemoryPerGBHour = 0.004 // $/GB/hour
|
// Does NOT include: datacenter, power, cooling, staff
|
||||||
|
OnpremCPUPerCoreHour = 0.015 // $/core/hour
|
||||||
|
OnpremMemoryPerGBHour = 0.003 // $/GB/hour
|
||||||
|
|
||||||
// Estimated resource usage per active VIN (based on typical workload)
|
// Estimated resource usage per active VIN
|
||||||
EstimatedCPUPerVin = 0.05 // 50 millicores per active VIN
|
// A connected vehicle generates ~1-5 MB/day of telemetry
|
||||||
EstimatedMemoryPerVin = 0.1 // 100MB per active VIN
|
// Processing includes: ingestion, Kafka, stream processing, storage, analytics
|
||||||
|
EstimatedCPUPerVin = 0.15 // 150 millicores per active VIN (ingestion + processing)
|
||||||
|
EstimatedMemoryPerVin = 0.25 // 250MB per active VIN (buffers, caches, state)
|
||||||
)
|
)
|
||||||
|
|
||||||
// CalculateCosts computes cloud and on-prem costs for given resource usage
|
// CalculateCosts computes cloud and on-prem costs for given resource usage
|
||||||
|
|||||||
Reference in New Issue
Block a user