Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ require (
github.com/google/go-cmp v0.7.0
github.com/google/uuid v1.6.0
github.com/jarcoal/httpmock v1.4.0
github.com/nebius/gosdk v0.0.0-20250826102719-940ad1dfb5de
github.com/nebius/gosdk v0.2.22
github.com/pkg/errors v0.9.1
github.com/sfcompute/nodes-go v0.1.0-alpha.4
github.com/stretchr/testify v1.11.1
Expand All @@ -35,7 +35,7 @@ require (
)

require (
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.8-20250717185734-6c6e0d3c608e.1 // indirect
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.9-20250912141014-52f32327d4b0.1 // indirect
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.9 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.9 // indirect
Expand Down Expand Up @@ -94,7 +94,6 @@ require (
go.opentelemetry.io/otel/sdk/metric v1.43.0 // indirect
go.yaml.in/yaml/v2 v2.4.3 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b // indirect
golang.org/x/net v0.53.0 // indirect
golang.org/x/oauth2 v0.34.0 // indirect
golang.org/x/sync v0.20.0 // indirect
Expand Down
10 changes: 4 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.8-20250717185734-6c6e0d3c608e.1 h1:sjY1k5uszbIZfv11HO2keV4SLhNA47SabPO886v7Rvo=
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.8-20250717185734-6c6e0d3c608e.1/go.mod h1:8EQ5GzyGJQ5tEIwMSxCl8RKJYsjCpAwkdcENoioXT6g=
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.9-20250912141014-52f32327d4b0.1 h1:DQLS/rRxLHuugVzjJU5AvOwD57pdFl9he/0O7e5P294=
buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.9-20250912141014-52f32327d4b0.1/go.mod h1:aY3zbkNan5F+cGm9lITDP6oxJIwu0dn9KjJuJjWaHkg=
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0=
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
Expand Down Expand Up @@ -139,8 +139,8 @@ github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFd
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/nebius/gosdk v0.0.0-20250826102719-940ad1dfb5de h1:7GbDUDyH22dvN7ata8HuNVuDlcyaDzUs/s+03Y3pDqU=
github.com/nebius/gosdk v0.0.0-20250826102719-940ad1dfb5de/go.mod h1:eVbm4Qc4GPzBn3EL4rLvy1WS9zqJDw+giksOA2NZERY=
github.com/nebius/gosdk v0.2.22 h1:1TTi1NAGlPSpT6mB6WFn9ZQjkPseBiL9ZhLPCNn7+/4=
github.com/nebius/gosdk v0.2.22/go.mod h1:D0dqxPszWitB2rceWKF2Ty1uNunJ8YiYD3dLEdYBmOI=
github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM=
github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4=
Expand Down Expand Up @@ -219,8 +219,6 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b h1:DXr+pvt3nC887026GRP39Ej11UATqWDmWuS99x26cD0=
golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b/go.mod h1:4QTo5u+SEIbbKW1RacMZq1YEfOBqeXa19JeshGi+zc4=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
Expand Down
6 changes: 3 additions & 3 deletions v1/providers/nebius/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func findProjectForRegion(ctx context.Context, sdk *gosdk.SDK, tenantID, region
pageSize := int64(1000)
projectsResp, err := sdk.Services().IAM().V1().Project().List(ctx, &nebiusiamv1.ListProjectsRequest{
ParentId: tenantID,
PageSize: &pageSize,
PageSize: pageSize,
})
if err != nil {
return "", errors.WrapAndTrace(err)
Expand Down Expand Up @@ -183,7 +183,7 @@ func (c *NebiusClient) discoverAllProjects(ctx context.Context) ([]string, error
pageSize := int64(1000)
projectsResp, err := c.sdk.Services().IAM().V1().Project().List(ctx, &nebiusiamv1.ListProjectsRequest{
ParentId: c.tenantID,
PageSize: &pageSize,
PageSize: pageSize,
})
if err != nil {
return nil, fmt.Errorf("failed to list projects: %w", err)
Expand All @@ -209,7 +209,7 @@ func (c *NebiusClient) discoverAllProjectsWithRegions(ctx context.Context) (map[
pageSize := int64(1000)
projectsResp, err := c.sdk.Services().IAM().V1().Project().List(ctx, &nebiusiamv1.ListProjectsRequest{
ParentId: c.tenantID,
PageSize: &pageSize,
PageSize: pageSize,
})
if err != nil {
return nil, fmt.Errorf("failed to list projects: %w", err)
Expand Down
4 changes: 2 additions & 2 deletions v1/providers/nebius/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ func (c *NebiusClient) getDefaultImages(ctx context.Context) ([]v1.Image, error)

// getImageDescription extracts description from ImageSpec if available
func getImageDescription(image *compute.Image) string {
if image.Spec != nil && image.Spec.Description != nil {
return *image.Spec.Description
if image.Spec != nil {
return image.Spec.Description
}
return ""
}
Expand Down
133 changes: 124 additions & 9 deletions v1/providers/nebius/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/brevdev/cloud/internal/errors"
v1 "github.com/brevdev/cloud/v1"
billing "github.com/nebius/gosdk/proto/nebius/billing/v1alpha1"
capacityv1 "github.com/nebius/gosdk/proto/nebius/capacity/v1"
common "github.com/nebius/gosdk/proto/nebius/common/v1"
compute "github.com/nebius/gosdk/proto/nebius/compute/v1"
quotas "github.com/nebius/gosdk/proto/nebius/quotas/v1"
Expand Down Expand Up @@ -65,11 +66,18 @@ func (c *NebiusClient) GetInstanceTypes(ctx context.Context, args v1.GetInstance
quotaMap = make(map[string]*quotas.QuotaAllowance)
}

capacityAdviceMap, capacityErr := c.getResourceAdviceMap(ctx)
if capacityErr != nil {
c.logger.Warn(ctx, "failed to fetch capacity advisor data, falling back to quota-only availability",
v1.LogField("error", capacityErr.Error()))
capacityAdviceMap = nil
}

var instanceTypes []v1.InstanceType

// For each location, get instance types with availability/quota info
for _, location := range locations {
locationInstanceTypes, err := c.getInstanceTypesForLocation(ctx, platformsResp, location, args, quotaMap)
locationInstanceTypes, err := c.getInstanceTypesForLocation(ctx, platformsResp, location, args, quotaMap, capacityAdviceMap)
if err != nil {
continue // Skip failed locations
}
Expand Down Expand Up @@ -108,8 +116,8 @@ func (c *NebiusClient) GetInstanceTypeQuotas(_ context.Context, _ v1.GetInstance

// getInstanceTypesForLocation gets instance types for a specific location with quota/availability checking
//
//nolint:gocognit,unparam // Complex function iterating platforms, presets, and quota checks
func (c *NebiusClient) getInstanceTypesForLocation(ctx context.Context, platformsResp *compute.ListPlatformsResponse, location v1.Location, _ v1.GetInstanceTypeArgs, quotaMap map[string]*quotas.QuotaAllowance) ([]v1.InstanceType, error) {
//nolint:unparam // error return kept for consistency with other provider helpers
func (c *NebiusClient) getInstanceTypesForLocation(ctx context.Context, platformsResp *compute.ListPlatformsResponse, location v1.Location, _ v1.GetInstanceTypeArgs, quotaMap map[string]*quotas.QuotaAllowance, capacityAdviceMap map[string]uint32) ([]v1.InstanceType, error) {
var instanceTypes []v1.InstanceType

for _, platform := range platformsResp.GetItems() {
Expand Down Expand Up @@ -145,13 +153,15 @@ func (c *NebiusClient) getInstanceTypesForLocation(ctx context.Context, platform
// Determine GPU type and details from platform name
gpuType, gpuName := extractGPUTypeAndName(platform.Metadata.Name)

// Check quota/availability for this instance type in this location
isAvailable := c.checkPresetQuotaAvailability(preset.Resources, location.Name, platform.Metadata.Name, quotaMap)
// Check quota for this instance type in this location. GPU availability requires both
// Capacity Advisor stock (when present) and remaining tenant quota.
hasQuota := c.checkPresetQuotaAvailability(preset.Resources, location.Name, platform.Metadata.Name, quotaMap)

// Skip instance types with no quota at all
if !isAvailable {
continue
}
isAvailable := c.resolvePresetAvailability(
ctx, isCPUOnly, hasQuota,
location.Name, platform.Metadata.Name, preset.Name,
capacityAdviceMap,
)

// Increment CPU preset counter if this is a CPU platform
if isCPUOnly {
Expand Down Expand Up @@ -250,6 +260,111 @@ func (c *NebiusClient) getQuotaMap(ctx context.Context) (map[string]*quotas.Quot
return quotaMap, nil
}

func capacityAdviceKey(region, platform, preset string) string {
return fmt.Sprintf("%s:%s:%s", region, platform, preset)
}

func (c *NebiusClient) resolvePresetAvailability(
ctx context.Context,
isCPUOnly bool,
hasQuota bool,
locationName, platformName, presetName string,
capacityAdviceMap map[string]uint32,
) bool {
if isCPUOnly || capacityAdviceMap == nil {
return hasQuota
}

capacityKey := capacityAdviceKey(locationName, platformName, presetName)
available, ok := capacityAdviceMap[capacityKey]
if !ok {
// ResourceAdvice may not include every preset/region; missing key is unknown, not unavailable.
c.logger.Debug(ctx, "capacity advice key not found, falling back to quota availability",
v1.LogField("capacityKey", capacityKey),
v1.LogField("hasQuota", hasQuota))
return hasQuota
}

return available > 0 && hasQuota
}

func resourceAdviceEntry(item *capacityv1.ResourceAdvice) (key string, available uint32, ok bool) {
spec := item.GetSpec()
if spec == nil {
return "", 0, false
}

computeInstance := spec.GetComputeInstance()
if computeInstance == nil {
return "", 0, false
}

preset := computeInstance.GetPreset()
if preset == nil {
return "", 0, false
}

onDemand := item.GetStatus().GetOnDemand()
available = onDemand.GetAvailable()
if onDemand.GetDataState() == capacityv1.ResourceAdviceStatus_Availability_DATA_STATE_UNKNOWN ||
onDemand.GetAvailabilityLevel() == capacityv1.ResourceAdviceStatus_Availability_AVAILABILITY_LEVEL_LIMIT_REACHED {
available = 0
}

key = capacityAdviceKey(spec.GetRegion(), computeInstance.GetPlatform(), preset.GetName())
return key, available, true
}

func buildResourceAdviceMapFromItems(items []*capacityv1.ResourceAdvice) map[string]uint32 {
adviceMap := make(map[string]uint32)

for _, item := range items {
key, available, ok := resourceAdviceEntry(item)
if !ok {
continue
}
if existing, exists := adviceMap[key]; !exists || available > existing {
adviceMap[key] = available
}
}

return adviceMap
}

func (c *NebiusClient) getResourceAdviceMap(ctx context.Context) (map[string]uint32, error) {
adviceMap := make(map[string]uint32)

for item, err := range c.sdk.Services().Capacity().V1().ResourceAdvice().Filter(ctx, &capacityv1.ListResourceAdviceRequest{
ParentId: c.tenantID,
}) {
if err != nil {
return nil, errors.WrapAndTrace(err)
}

key, available, ok := resourceAdviceEntry(item)
if !ok {
continue
}

spec := item.GetSpec()
computeInstance := spec.GetComputeInstance()
preset := computeInstance.GetPreset()
c.logger.Debug(ctx, "capacity advice map entry",
v1.LogField("capacityKey", key),
v1.LogField("available", available),
v1.LogField("region", spec.GetRegion()),
v1.LogField("platform", computeInstance.GetPlatform()),
v1.LogField("preset", preset.GetName()))
if existing, exists := adviceMap[key]; !exists || available > existing {
adviceMap[key] = available
}
}

c.logger.Debug(ctx, "built capacity advice map", v1.LogField("entryCount", len(adviceMap)))

return adviceMap, nil
}

// checkPresetQuotaAvailability checks if a preset has available quota in the specified region
//
//nolint:gocyclo // Complex quota checking with multiple resource types
Expand Down
Loading
Loading