diff --git a/deploy/cec-prd-cluster/cost.yaml b/deploy/cec-prd-cluster/cost.yaml index af93b00..20cb9a9 100644 --- a/deploy/cec-prd-cluster/cost.yaml +++ b/deploy/cec-prd-cluster/cost.yaml @@ -17,7 +17,7 @@ spec: spec: containers: - name: cost - image: fiskercloud.azurecr.io/cost:v11 + image: fiskercloud.azurecr.io/cost:v12 imagePullPolicy: IfNotPresent ports: - containerPort: 8077 diff --git a/services/cost/README.md b/services/cost/README.md index b2a6638..aefdc56 100644 --- a/services/cost/README.md +++ b/services/cost/README.md @@ -24,18 +24,14 @@ Whether you have 100 vehicles or 100,000, you still need Kafka, databases, and g ### Platform Base Resources (Fixed) -What it takes to run the cloud services platform: +What it takes to run the cloud services platform (from migration plan v1.1): | Component | CPU (cores) | Memory (GB) | Notes | |-----------|-------------|-------------|-------| -| Kafka brokers | 32 | 128 | 3-node cluster | -| ClickHouse | 64 | 256 | 3 shards for HA | -| MongoDB | 16 | 128 | Replica set | -| Redis | 16 | 128 | Cluster mode | -| PostgreSQL | 32 | 128 | Primary + replicas | -| Gateway services | 8 | 64 | API gateway, auth | -| Monitoring/logging | 8 | 64 | Prometheus, Grafana, Loki | -| **Total Platform Base** | **176** | **896** | | +| VM 1-4: Core Services | 16 | 64 | Kafka, OTA, Valet, Auth/APIs | +| VM 5: Analytics Primary | 32 | 256 | ClickHouse, Ditto, Beacon, Jetfire | +| VM 6: Analytics Secondary | 32 | 256 | Optimus, Cargo, Vehicle Analytics | +| **Total Platform Base** | **80** | **544** | Based on migration plan | ### Per-VIN Resources (Marginal) @@ -51,19 +47,19 @@ Incremental resources needed for each additional connected vehicle: | Resource | Cloud (Azure) | On-Prem/Bare Metal | |----------|---------------|-------------------| -| CPU/core-hour | $0.30 | $0.02 | -| Memory/GB-hour | $0.08 | $0.005 | -| Managed Services/15min | $10.00 | $2.50 | +| CPU/core-hour | $0.35 | $0.015 | +| Memory/GB-hour | $0.10 | $0.004 | +| Managed Services/15min | $17.00 | $1.50 | -#### Why On-Prem is ~90% Cheaper -- **Platform base**: Same hardware, but cloud charges ~15x more for managed services -- **Per-VIN compute**: Cloud VMs cost ~15x more than amortized bare metal -- **Managed services**: Event Hubs, CosmosDB, etc. have significant markup vs self-hosted equivalents +#### Why On-Prem is ~90-95% Cheaper +- **Platform base**: Same workload, but cloud charges ~20x more for managed services +- **Per-VIN compute**: Cloud VMs cost ~20x more than amortized bare metal +- **Managed services**: Event Hubs, CosmosDB, Azure DB for PostgreSQL have significant markup vs self-hosted ### Savings Calculation ``` -Platform Base Cost = (176 cores × rate + 896 GB × rate) × hours +Platform Base Cost = (80 cores × rate + 544 GB × rate) × hours Per-VIN Cost = (0.05 cores × rate + 0.08 GB × rate) × hours × activity_multiplier Total Cost = Platform Base + (Per-VIN × VIN count) + Managed Services @@ -73,18 +69,17 @@ Savings = Cloud Cost - On-Prem Cost Savings % = (Savings / Cloud Cost) × 100 ``` -Expected savings: **~90%** with on-prem/bare metal hosting. +Expected savings: **~90-95%** with on-prem/bare metal hosting. -### Projected Annual Costs (5000 vehicles) +### Projected Annual Costs -Based on ~$100k/month cloud spend: +Based on ~$1M/year Azure spend (migration plan v1.1): | Metric | Cloud | On-Prem | |--------|-------|---------| -| Monthly Cost | ~$100,000 | ~$9,500 | -| Annual Cost | ~$1,200,000 | ~$114,000 | -| Per Vehicle/Month | ~$20.00 | ~$1.90 | -| Annual Savings | ~$1,086,000 (90%) | - | +| Monthly Cost | ~$83,000 | ~$7,000 | +| Annual Cost | ~$1,000,000 | ~$84,000 | +| Annual Savings | ~$916,000 (92%) | - | ## API Endpoints diff --git a/services/cost/cost b/services/cost/cost deleted file mode 100755 index 5be1540..0000000 Binary files a/services/cost/cost and /dev/null differ diff --git a/services/cost/services/collector.go b/services/cost/services/collector.go index 9ce8139..b5d83d4 100644 --- a/services/cost/services/collector.go +++ b/services/cost/services/collector.go @@ -8,16 +8,18 @@ import ( // Cost rates per hour const ( - // Cloud costs (based on ~$100k/month for ~5000 vehicles) - // Includes: AKS compute, Event Hubs (Kafka), CosmosDB, storage, networking, monitoring - CloudCPUPerCoreHour = 0.30 // $/core/hour (Azure D-series + managed services + support) - CloudMemoryPerGBHour = 0.08 // $/GB/hour (includes managed DB, Redis, caching layers) + // Cloud costs (based on ~$1M/year = ~$83k/month Azure spend) + // Includes: AKS compute, Event Hubs (Kafka), CosmosDB, Azure DB for PostgreSQL, + // Azure Cache for Redis, Blob Storage, networking, monitoring, support + CloudCPUPerCoreHour = 0.35 // $/core/hour (Azure D-series + managed services + support) + CloudMemoryPerGBHour = 0.10 // $/GB/hour (includes managed DB, Redis, caching layers) // On-prem/bare metal costs (amortized hardware only) + // Based on migration plan: 6 VMs, 80 cores, 544GB RAM, 17TB storage // Assumes: 3-year hardware amortization, minimal ops overhead // Does NOT include: datacenter, power, cooling, staff, network - OnpremCPUPerCoreHour = 0.02 // $/core/hour - OnpremMemoryPerGBHour = 0.005 // $/GB/hour + OnpremCPUPerCoreHour = 0.015 // $/core/hour (~95% cheaper than cloud) + OnpremMemoryPerGBHour = 0.004 // $/GB/hour // Per-VIN resource usage (marginal cost per vehicle) // This is the incremental CPU/RAM needed for each additional connected vehicle @@ -26,16 +28,18 @@ const ( PerVinMemoryGB = 0.08 // 80MB per VIN (marginal) // Platform base resources (fixed cost regardless of VIN count) - // This is the minimum infrastructure to run the cloud services platform: - // Kafka brokers, ClickHouse, MongoDB, Redis, PostgreSQL, gateway services, etc. - PlatformBaseCPUCores = 176.0 // 176 cores for base platform - PlatformBaseMemoryGB = 896.0 // 896GB RAM for base platform + // Based on migration plan production environment: + // VM1-4: 64GB RAM, 16 cores (Kafka, OTA, Valet, Auth/APIs) + // VM5-6: 512GB RAM, 64 cores (ClickHouse, Analytics, Vehicle Data) + // Total: 80 cores, 544GB RAM + PlatformBaseCPUCores = 80.0 // 80 cores for base platform (from migration plan) + PlatformBaseMemoryGB = 544.0 // 544GB RAM for base platform (from migration plan) // Base infrastructure cost per collection interval (managed services, storage, networking) - // Cloud: Event Hubs, CosmosDB, Azure Storage, Defender, monitoring (~$45k/month) - // On-prem: ~75% cheaper - just hardware amortization for equivalent capacity - BaseInfraCloudCost = 10.00 // $/15min for managed services (cloud) - BaseInfraOnpremCost = 2.50 // $/15min for equivalent on-prem (75% cheaper) + // Cloud: Event Hubs, CosmosDB, Azure Storage, Defender, monitoring (~$50k/month of $83k total) + // On-prem: ~90% cheaper - just hardware amortization + minimal hosting + BaseInfraCloudCost = 17.00 // $/15min for managed services (cloud) - ~$50k/month + BaseInfraOnpremCost = 1.50 // $/15min for equivalent on-prem (~91% cheaper) ) // CalculateCosts computes cloud and on-prem costs for given resource usage