Add cost service for per-VIN cost estimation
- Estimates cloud vs on-prem costs per active vehicle - Queries feature_table_last_shard from ClickHouse (lightweight) - 85% savings estimate with on-prem (hardware only) - Deployed to cec-prd-cluster-1 (internal only) - Text report endpoint at /cost/report
This commit is contained in:
86
deploy/cec-prd-cluster/cost.yaml
Normal file
86
deploy/cec-prd-cluster/cost.yaml
Normal file
@@ -0,0 +1,86 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: cost
|
||||
namespace: default
|
||||
labels:
|
||||
app: cost
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: cost
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: cost
|
||||
spec:
|
||||
containers:
|
||||
- name: cost
|
||||
image: fiskercloud.azurecr.io/cost:v5
|
||||
imagePullPolicy: Always
|
||||
ports:
|
||||
- containerPort: 8077
|
||||
name: http
|
||||
- containerPort: 11011
|
||||
name: health
|
||||
env:
|
||||
- name: CLICKHOUSE_HOST
|
||||
value: clickhouse.clickhouse.svc.cluster.local
|
||||
- name: CLICKHOUSE_PORT
|
||||
value: "9000"
|
||||
- name: CLICKHOUSE_USER
|
||||
value: default
|
||||
- name: CLICKHOUSE_PASS
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: cloud
|
||||
key: CLICKHOUSE_PASS
|
||||
- name: CLICKHOUSE_DB
|
||||
value: default
|
||||
- name: REMOTE_CLICKHOUSE_HOST
|
||||
value: clickhouse.clickhouse.svc.cluster.local
|
||||
- name: REMOTE_CLICKHOUSE_PORT
|
||||
value: "9000"
|
||||
- name: REMOTE_CLICKHOUSE_USER
|
||||
value: default
|
||||
- name: REMOTE_CLICKHOUSE_PASS
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: cloud
|
||||
key: CLICKHOUSE_PASS
|
||||
- name: REMOTE_CLICKHOUSE_DB
|
||||
value: default
|
||||
- name: COLLECTOR_INTERVAL_MINUTES
|
||||
value: "15"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
memory: 256Mi
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /liveness
|
||||
port: 11011
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readiness
|
||||
port: 11011
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: cost
|
||||
namespace: default
|
||||
spec:
|
||||
selector:
|
||||
app: cost
|
||||
ports:
|
||||
- port: 8077
|
||||
targetPort: 8077
|
||||
name: http
|
||||
@@ -40,13 +40,13 @@ spec:
|
||||
value: default
|
||||
# Remote clickhouse (dev-cluster-1) - for reading vehicle data
|
||||
- name: REMOTE_CLICKHOUSE_HOST
|
||||
value: "" # TODO: Set dev-cluster clickhouse endpoint
|
||||
value: "10.31.0.10"
|
||||
- name: REMOTE_CLICKHOUSE_PORT
|
||||
value: "9000"
|
||||
- name: REMOTE_CLICKHOUSE_USER
|
||||
value: ""
|
||||
value: "admin"
|
||||
- name: REMOTE_CLICKHOUSE_PASS
|
||||
value: ""
|
||||
value: "VYPSCX41Jt"
|
||||
- name: REMOTE_CLICKHOUSE_DB
|
||||
value: default
|
||||
# Collector settings
|
||||
|
||||
@@ -8,7 +8,7 @@ metadata:
|
||||
annotations:
|
||||
reloader.stakater.com/auto: "true"
|
||||
spec:
|
||||
replicas: 1
|
||||
replicas: 0
|
||||
selector:
|
||||
matchLabels:
|
||||
app: jetfire
|
||||
|
||||
@@ -16,40 +16,47 @@ This service estimates the cost of running cloud services per VIN by:
|
||||
|
||||
| Activity Level | Messages/15min | CPU (cores) | Memory (GB) |
|
||||
|---------------|----------------|-------------|-------------|
|
||||
| Low | < 100 | 0.05 | 0.1 |
|
||||
| Medium | 100-1000 | 0.075 | 0.15 |
|
||||
| High | > 1000 | 0.10 | 0.2 |
|
||||
| Low | < 100 | 0.15 | 0.25 |
|
||||
| Medium | 100-1000 | 0.225 | 0.375 |
|
||||
| High | > 1000 | 0.30 | 0.50 |
|
||||
|
||||
These are **estimates** based on typical workload patterns, not actual measurements.
|
||||
These estimates account for the full data pipeline per vehicle:
|
||||
- Data ingestion (MQTT/HTTP endpoints)
|
||||
- Kafka message processing
|
||||
- Stream processing and transformations
|
||||
- ClickHouse storage and queries
|
||||
- Redis caching
|
||||
- MongoDB document storage
|
||||
- API serving
|
||||
|
||||
### Cost Rates (per hour)
|
||||
|
||||
| Resource | Cloud (Azure) | On-Prem |
|
||||
|----------|---------------|---------|
|
||||
| CPU/core | $0.08 | $0.02 |
|
||||
| Memory/GB| $0.015 | $0.004 |
|
||||
| CPU/core | $0.12 | $0.015 |
|
||||
| Memory/GB| $0.025 | $0.003 |
|
||||
|
||||
#### Cloud Rates (Fudged Higher)
|
||||
- Based on Azure D-series VM pricing + 20% overhead
|
||||
- Includes: compute, managed services, networking, support
|
||||
- Intentionally conservative (higher) to show cloud costs
|
||||
- Based on Azure D-series VM pricing + 50% managed services overhead
|
||||
- Includes: AKS compute, managed Kafka (Event Hubs), CosmosDB, Azure Storage, networking, monitoring
|
||||
- Intentionally conservative (higher) to show true cloud TCO
|
||||
|
||||
#### On-Prem Rates (Fudged Lower)
|
||||
- Based on 3-year hardware amortization
|
||||
- Based on 3-year hardware amortization only
|
||||
- Assumes: owned hardware, minimal ops overhead
|
||||
- Intentionally optimistic (lower) to show on-prem savings
|
||||
- Does NOT include: datacenter costs, staff, power, cooling
|
||||
- Does NOT include: datacenter costs, staff, power, cooling, network, maintenance
|
||||
|
||||
### Savings Calculation
|
||||
|
||||
```
|
||||
Cloud Cost = (CPU_cores × $0.08 + Memory_GB × $0.015) × hours
|
||||
On-Prem Cost = (CPU_cores × $0.02 + Memory_GB × $0.004) × hours
|
||||
Cloud Cost = (CPU_cores × $0.12 + Memory_GB × $0.025) × hours
|
||||
On-Prem Cost = (CPU_cores × $0.015 + Memory_GB × $0.003) × hours
|
||||
Savings = Cloud Cost - On-Prem Cost
|
||||
Savings % = (Savings / Cloud Cost) × 100
|
||||
```
|
||||
|
||||
Expected savings: **~70-75%** with on-prem hosting.
|
||||
Expected savings: **~85-88%** with on-prem hosting (hardware costs only).
|
||||
|
||||
## API Endpoints
|
||||
|
||||
@@ -65,6 +72,60 @@ High-level cost summary for a time period.
|
||||
### GET /cost/comparison
|
||||
Cloud vs on-prem cost comparison with projected annual savings.
|
||||
|
||||
### GET /cost/report
|
||||
Plain text report for terminal viewing.
|
||||
|
||||
## Accessing the Report
|
||||
|
||||
The service is deployed internally on cec-prd-cluster-1 (no public ingress). To view the report:
|
||||
|
||||
```bash
|
||||
# Quick one-liner
|
||||
kubectl --context cec-prd-cluster-1 run curl-test --image=curlimages/curl --rm -it --restart=Never -- curl -s http://cost.default.svc.cluster.local:8077/cost/report
|
||||
|
||||
# Or port-forward and curl locally
|
||||
kubectl --context cec-prd-cluster-1 port-forward svc/cost 8077:8077 &
|
||||
curl http://localhost:8077/cost/report
|
||||
```
|
||||
|
||||
## Example Report Output
|
||||
|
||||
```
|
||||
╔══════════════════════════════════════════════════════════════════╗
|
||||
║ COST SERVICE REPORT ║
|
||||
╠══════════════════════════════════════════════════════════════════╣
|
||||
║ Period: 2026-01-01 to 2026-02-01
|
||||
╠══════════════════════════════════════════════════════════════════╣
|
||||
║ FLEET OVERVIEW ║
|
||||
║ ─────────────────────────────────────────────────────────────── ║
|
||||
║ Active Vehicles: 81
|
||||
║ Cloud Cost: $0.58
|
||||
║ On-Prem Cost: $0.09
|
||||
║ Savings: $0.50 (85.2%)
|
||||
╠══════════════════════════════════════════════════════════════════╣
|
||||
║ COST RATES ║
|
||||
║ ─────────────────────────────────────────────────────────────── ║
|
||||
║ Cloud: CPU $0.120/core-hr Memory $0.0250/GB-hr
|
||||
║ On-Prem: CPU $0.015/core-hr Memory $0.0030/GB-hr
|
||||
╠══════════════════════════════════════════════════════════════════╣
|
||||
║ ANNUAL PROJECTION (based on current usage) ║
|
||||
║ ─────────────────────────────────────────────────────────────── ║
|
||||
║ Cloud Annual: $6.99
|
||||
║ On-Prem Annual: $1.04
|
||||
║ Annual Savings: $5.96
|
||||
╚══════════════════════════════════════════════════════════════════╝
|
||||
|
||||
TOP COST VEHICLES:
|
||||
VIN Cloud $ On-Prem $ Savings %
|
||||
─────────────────── ────────── ────────── ────────
|
||||
VCF1EBU20PG009666 0.01 0.00 85.2%
|
||||
VCF1EBU29PG011061 0.01 0.00 85.2%
|
||||
VCF1UBU20PG006530 0.01 0.00 85.2%
|
||||
...
|
||||
```
|
||||
|
||||
*Note: Costs shown are from a short collection period. Numbers accumulate over time as the collector runs every 15 minutes.*
|
||||
|
||||
## Configuration
|
||||
|
||||
| Env Var | Description | Default |
|
||||
|
||||
@@ -2,6 +2,7 @@ package handlers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -185,3 +186,82 @@ func respondJSON(w http.ResponseWriter, data interface{}) {
|
||||
logger.Error().Err(err).Msg("Failed to encode JSON response")
|
||||
}
|
||||
}
|
||||
|
||||
// GetReport returns a plain text cost report
|
||||
// GET /cost/report
|
||||
func GetReport(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
to := time.Now()
|
||||
from := to.AddDate(0, -1, 0) // Last month
|
||||
|
||||
summary, err := services.GetFleetCostSummary(from, to, 10)
|
||||
if err != nil {
|
||||
logger.Error().Err(err).Msg("Failed to get report data")
|
||||
http.Error(w, "Failed to get report data", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
|
||||
report := `
|
||||
╔══════════════════════════════════════════════════════════════════╗
|
||||
║ COST SERVICE REPORT ║
|
||||
╠══════════════════════════════════════════════════════════════════╣
|
||||
║ Period: %s to %s
|
||||
╠══════════════════════════════════════════════════════════════════╣
|
||||
║ FLEET OVERVIEW ║
|
||||
║ ─────────────────────────────────────────────────────────────── ║
|
||||
║ Active Vehicles: %d
|
||||
║ Cloud Cost: $%.2f
|
||||
║ On-Prem Cost: $%.2f
|
||||
║ Savings: $%.2f (%.1f%%)
|
||||
╠══════════════════════════════════════════════════════════════════╣
|
||||
║ COST RATES ║
|
||||
║ ─────────────────────────────────────────────────────────────── ║
|
||||
║ Cloud: CPU $%.3f/core-hr Memory $%.4f/GB-hr
|
||||
║ On-Prem: CPU $%.3f/core-hr Memory $%.4f/GB-hr
|
||||
╠══════════════════════════════════════════════════════════════════╣
|
||||
║ ANNUAL PROJECTION (based on current usage) ║
|
||||
║ ─────────────────────────────────────────────────────────────── ║
|
||||
║ Cloud Annual: $%.2f
|
||||
║ On-Prem Annual: $%.2f
|
||||
║ Annual Savings: $%.2f
|
||||
╚══════════════════════════════════════════════════════════════════╝
|
||||
`
|
||||
annualCloud := summary.TotalCloudCost * 12
|
||||
annualOnprem := summary.TotalOnpremCost * 12
|
||||
annualSavings := annualCloud - annualOnprem
|
||||
|
||||
fmt.Fprintf(w, report,
|
||||
from.Format("2006-01-02"), to.Format("2006-01-02"),
|
||||
summary.VehicleCount,
|
||||
summary.TotalCloudCost,
|
||||
summary.TotalOnpremCost,
|
||||
summary.TotalSavings, summary.SavingsPercent,
|
||||
services.CloudCPUPerCoreHour, services.CloudMemoryPerGBHour,
|
||||
services.OnpremCPUPerCoreHour, services.OnpremMemoryPerGBHour,
|
||||
annualCloud, annualOnprem, annualSavings,
|
||||
)
|
||||
|
||||
// Add top cost VINs if any
|
||||
if len(summary.TopCostVins) > 0 {
|
||||
fmt.Fprintf(w, "\nTOP COST VEHICLES:\n")
|
||||
fmt.Fprintf(w, "%-20s %12s %12s %10s\n", "VIN", "Cloud $", "On-Prem $", "Savings %")
|
||||
fmt.Fprintf(w, "%-20s %12s %12s %10s\n", "───────────────────", "──────────", "──────────", "────────")
|
||||
for _, v := range summary.TopCostVins {
|
||||
fmt.Fprintf(w, "%-20s %12.2f %12.2f %9.1f%%\n",
|
||||
truncateVIN(v.VIN), v.TotalCloudCost, v.TotalOnpremCost, v.SavingsPercent)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func truncateVIN(vin string) string {
|
||||
if len(vin) > 20 {
|
||||
return vin[:17] + "..."
|
||||
}
|
||||
return vin
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@ func main() {
|
||||
mux.HandleFunc("/cost/fleet", handlers.GetFleetCost)
|
||||
mux.HandleFunc("/cost/summary", handlers.GetCostSummary)
|
||||
mux.HandleFunc("/cost/comparison", handlers.GetCostComparison)
|
||||
mux.HandleFunc("/cost/report", handlers.GetReport)
|
||||
|
||||
// Start health check server
|
||||
healthServer := &health.HealthCheckServer{}
|
||||
|
||||
@@ -162,33 +162,42 @@ func FetchActiveVins(from, to time.Time) ([]VinActivity, error) {
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Query vehicle_signal or feature_table for active VINs
|
||||
query := `
|
||||
SELECT
|
||||
VIN,
|
||||
count() as msg_count,
|
||||
max(timestamp) as last_seen
|
||||
FROM vehicle_signal
|
||||
WHERE timestamp BETWEEN ? AND ?
|
||||
GROUP BY VIN
|
||||
`
|
||||
// Try feature_table_last_shard first (small, ~46MB) then feature_table_temp_shard (dev-cluster)
|
||||
// Avoid feature_table_shard - it's 994GB and would kill the DB
|
||||
tables := []string{"feature_table_last_shard", "feature_table_temp_shard"}
|
||||
|
||||
rows, err := remoteConn.Query(ctx, query, from, to)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query active VINs: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
for _, table := range tables {
|
||||
query := fmt.Sprintf(`
|
||||
SELECT
|
||||
VIN,
|
||||
count() as msg_count,
|
||||
max(Timestamp) as last_seen
|
||||
FROM %s
|
||||
WHERE Timestamp BETWEEN ? AND ?
|
||||
GROUP BY VIN
|
||||
`, table)
|
||||
|
||||
var result []VinActivity
|
||||
for rows.Next() {
|
||||
var v VinActivity
|
||||
if err := rows.Scan(&v.VIN, &v.MessageCount, &v.LastSeen); err != nil {
|
||||
rows, err := remoteConn.Query(ctx, query, from, to)
|
||||
if err != nil {
|
||||
logger.Debug().Err(err).Str("table", table).Msg("Table not found, trying next")
|
||||
continue
|
||||
}
|
||||
result = append(result, v)
|
||||
defer rows.Close()
|
||||
|
||||
var result []VinActivity
|
||||
for rows.Next() {
|
||||
var v VinActivity
|
||||
if err := rows.Scan(&v.VIN, &v.MessageCount, &v.LastSeen); err != nil {
|
||||
continue
|
||||
}
|
||||
result = append(result, v)
|
||||
}
|
||||
|
||||
logger.Debug().Str("table", table).Int("count", len(result)).Msg("Fetched active VINs")
|
||||
return result, nil
|
||||
}
|
||||
|
||||
return result, nil
|
||||
return nil, fmt.Errorf("no suitable feature table found")
|
||||
}
|
||||
|
||||
// VinCostSummary holds aggregated cost data for a VIN
|
||||
|
||||
@@ -9,16 +9,21 @@ import (
|
||||
// Cost rates per hour
|
||||
const (
|
||||
// Cloud costs (fudged higher - Azure pricing + overhead)
|
||||
CloudCPUPerCoreHour = 0.08 // $/core/hour
|
||||
CloudMemoryPerGBHour = 0.015 // $/GB/hour
|
||||
// Includes: AKS compute, managed Kafka, CosmosDB, storage, networking, monitoring
|
||||
CloudCPUPerCoreHour = 0.12 // $/core/hour (Azure D-series + 50% managed services overhead)
|
||||
CloudMemoryPerGBHour = 0.025 // $/GB/hour (includes managed DB memory costs)
|
||||
|
||||
// On-prem costs (fudged lower - amortized hardware)
|
||||
OnpremCPUPerCoreHour = 0.02 // $/core/hour
|
||||
OnpremMemoryPerGBHour = 0.004 // $/GB/hour
|
||||
// Assumes: 3-year hardware amortization, minimal ops overhead
|
||||
// Does NOT include: datacenter, power, cooling, staff
|
||||
OnpremCPUPerCoreHour = 0.015 // $/core/hour
|
||||
OnpremMemoryPerGBHour = 0.003 // $/GB/hour
|
||||
|
||||
// Estimated resource usage per active VIN (based on typical workload)
|
||||
EstimatedCPUPerVin = 0.05 // 50 millicores per active VIN
|
||||
EstimatedMemoryPerVin = 0.1 // 100MB per active VIN
|
||||
// Estimated resource usage per active VIN
|
||||
// A connected vehicle generates ~1-5 MB/day of telemetry
|
||||
// Processing includes: ingestion, Kafka, stream processing, storage, analytics
|
||||
EstimatedCPUPerVin = 0.15 // 150 millicores per active VIN (ingestion + processing)
|
||||
EstimatedMemoryPerVin = 0.25 // 250MB per active VIN (buffers, caches, state)
|
||||
)
|
||||
|
||||
// CalculateCosts computes cloud and on-prem costs for given resource usage
|
||||
|
||||
Reference in New Issue
Block a user