Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ describe(projectRow.name, () => {
expect(result.maxNodeFreeCpu).toBe(4000n);
expect(result.maxNodeFreeMemory).toBe(8_000_000_000n);
expect(result.maxNodeFreeGpu).toBe(2n);
expect(result.gpuModels).toEqual(["nvidia/a100"]);
expect(result.gpuModels).toEqual(["nvidia", "nvidia/a100"]);
expect(result.storageClasses).toEqual(["beta2"]);
});

Expand Down Expand Up @@ -101,7 +101,7 @@ describe(projectRow.name, () => {
})
);

expect(result.gpuModels).toEqual(["amd/mi300x", "nvidia/a100"]);
expect(result.gpuModels).toEqual(["amd", "amd/mi300x", "nvidia", "nvidia/a100"]);
});

it("handles ephemeral-only storage", () => {
Expand Down Expand Up @@ -170,7 +170,7 @@ describe(projectRow.name, () => {

expect(result.maxNodeFreeGpu).toBe(5n);
expect(result.totalAvailableGpu).toBe(9n);
expect(result.gpuModels).toEqual(["nvidia/a100", "nvidia/h100"]);
expect(result.gpuModels).toEqual(["nvidia", "nvidia/a100", "nvidia/h100"]);
});
});

Expand Down
4 changes: 4 additions & 0 deletions apps/provider-inventory/src/lib/project-row/project-row.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ export function projectRow(cluster: ClusterState): ProjectedRow {
if (gpu.vendor && gpu.name) {
gpuModelSet.add(`${gpu.vendor}/${gpu.name}`);
}

if (gpu.vendor) {
gpuModelSet.add(gpu.vendor);
}
}

for (const cls of node.storageClasses) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { describe, expect, it } from "vitest";

import type { GroupSpecJSON } from "@src/lib/groupspec-mapper/groupspec-mapper";
import type { RequestedResourceUnit, RequestedStorage } from "@src/types/inventory.types";
import type { RequestedResourceUnit, RequestedStorage, ResourceAttribute } from "@src/types/inventory.types";
import { aggregateCriteria } from "./bid-screening.aggregator";

describe(aggregateCriteria.name, () => {
Expand Down Expand Up @@ -145,24 +145,62 @@ describe(aggregateCriteria.name, () => {
});

describe("units dimension", () => {
it("emits an empty per-unit filter slot for each unit in this slice (issues 2/3 populate it)", () => {
it("emits a per-unit filter slot for each unit in this slice", () => {
const c = aggregateCriteria([makeUnit({}), makeUnit({})], makeRequirements());
expect(c.units).toEqual([
{ gpuTokens: [], persistentClasses: [] },
{ gpuTokens: [], persistentClasses: [] }
]);
});

it("emits a vendor-only token when the GPU attribute has no model (wildcard)", () => {
const c = aggregateCriteria([makeUnit({ gpu: 1n, gpuAttributes: [{ key: "vendor/nvidia", value: "true" }] })], makeRequirements());
expect(c.units[0].gpuTokens).toEqual(["nvidia"]);
});

it("emits a vendor/model token when the GPU attribute specifies a model", () => {
const c = aggregateCriteria([makeUnit({ gpu: 1n, gpuAttributes: [{ key: "vendor/nvidia/model/a100", value: "true" }] })], makeRequirements());
expect(c.units[0].gpuTokens).toEqual(["nvidia/a100"]);
});

it("emits every OR-alternative token when a unit has multiple GPU attributes", () => {
const c = aggregateCriteria(
[
makeUnit({
gpu: 1n,
gpuAttributes: [
{ key: "vendor/nvidia/model/a100", value: "true" },
{ key: "vendor/amd/model/mi300x", value: "true" }
]
})
],
makeRequirements()
);
expect(c.units[0].gpuTokens).toEqual(["nvidia/a100", "amd/mi300x"]);
});

it("emits an empty gpuTokens array for units that do not request a GPU", () => {
const c = aggregateCriteria([makeUnit({ gpu: 0n })], makeRequirements());
expect(c.units[0].gpuTokens).toEqual([]);
});
});
});

function makeUnit(input: { cpu?: bigint; memory?: bigint; gpu?: bigint; count?: number; storage?: RequestedStorage[] }): RequestedResourceUnit {
function makeUnit(input: {
cpu?: bigint;
memory?: bigint;
gpu?: bigint;
count?: number;
storage?: RequestedStorage[];
gpuAttributes?: ResourceAttribute[];
}): RequestedResourceUnit {
return {
id: 1,
count: input.count ?? 1,
resources: {
cpu: { units: input.cpu ?? 0n, attributes: [] },
memory: { quantity: input.memory ?? 0n, attributes: [] },
gpu: { units: input.gpu ?? 0n, attributes: [] },
gpu: { units: input.gpu ?? 0n, attributes: input.gpuAttributes ?? [] },
storage: input.storage ?? []
}
};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { parseGPUAttributes } from "@src/lib/gpu-attribute-parser/gpu-attribute-parser";
import type { GroupSpecJSON } from "@src/lib/groupspec-mapper/groupspec-mapper";
import { parseStorageAttributes } from "@src/lib/storage-attribute-parser/storage-attribute-parser";
import type { RequestedResourceUnit } from "@src/types/inventory.types";
import type { RequestedResourceUnit, ResourceAttribute } from "@src/types/inventory.types";

interface UnitFilters {
gpuTokens: string[];
Expand Down Expand Up @@ -57,7 +58,7 @@ export function aggregateCriteria(resourceUnits: RequestedResourceUnit[], requir
// ram volumes intentionally skipped — issue 4 will add them to totalMemory
}

units.push({ gpuTokens: [], persistentClasses: [] });
units.push({ gpuTokens: gpuTokensForUnit(unit.resources.gpu), persistentClasses: [] });
}

const attributes: BidScreeningCriteria["attributes"] = [];
Expand Down Expand Up @@ -96,3 +97,13 @@ export function aggregateCriteria(resourceUnits: RequestedResourceUnit[], requir
function escapeRegex(input: string): string {
return input.replace(/[\\.^$*+?()[\]{}|]/g, "\\$&");
}

function gpuTokensForUnit(gpu: { units: bigint; attributes: ResourceAttribute[] }): string[] {
if (gpu.units === 0n) return [];
const tokens: string[] = [];
for (const parsed of parseGPUAttributes(gpu.attributes)) {
const token = parsed.model === "*" ? parsed.vendor : `${parsed.vendor}/${parsed.model}`;
if (!tokens.includes(token)) tokens.push(token);
}
return tokens;
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import type { GroupSpecJSON } from "@src/lib/groupspec-mapper/groupspec-mapper";
import { ResourcePair } from "@src/lib/resource-pair/resource-pair";
import { providerInventory } from "@src/model-schemas/provider-inventory/provider-inventory.schema";
import { DRIZZLE_DB } from "@src/providers/drizzle.provider";
import type { RequestedResourceUnit } from "@src/types/inventory.types";
import type { RequestedResourceUnit, ResourceAttribute } from "@src/types/inventory.types";
import { AUDITOR, BidScreeningRepository } from "./bid-screening.repository";

describe(BidScreeningRepository.name, () => {
Expand Down Expand Up @@ -146,6 +146,76 @@ describe(BidScreeningRepository.name, () => {
});
});

describe("gpu_models filter", () => {
it("vendor-only request matches mixed-model providers via the vendor token", async () => {
await seed({ owner: "akash1nvidiaA100", gpuModels: ["nvidia", "nvidia/a100"], totalAvailableGpu: 8n, maxNodeFreeGpu: 8n });
await seed({ owner: "akash1nvidiaH100", gpuModels: ["nvidia", "nvidia/h100"], totalAvailableGpu: 8n, maxNodeFreeGpu: 8n });

const rows = await repository.findCandidates([unit({ gpu: 1n, gpuAttributes: [{ key: "vendor/nvidia", value: "true" }] })], requirements());

expect(owners(rows)).toEqual(["akash1nvidiaA100", "akash1nvidiaH100"]);
});

it("vendor-only request excludes wrong-vendor providers", async () => {
await seed({ owner: "akash1nvidia", gpuModels: ["nvidia", "nvidia/a100"], totalAvailableGpu: 8n, maxNodeFreeGpu: 8n });
await seed({ owner: "akash1amd", gpuModels: ["amd", "amd/mi300x"], totalAvailableGpu: 8n, maxNodeFreeGpu: 8n });

const rows = await repository.findCandidates([unit({ gpu: 1n, gpuAttributes: [{ key: "vendor/nvidia", value: "true" }] })], requirements());

expect(owners(rows)).toEqual(["akash1nvidia"]);
});

it("treats multiple GPU attributes on one unit as OR alternatives via overlap", async () => {
await seed({ owner: "akash1nvidia", gpuModels: ["nvidia", "nvidia/a100"], totalAvailableGpu: 8n, maxNodeFreeGpu: 8n });
await seed({ owner: "akash1amd", gpuModels: ["amd", "amd/mi300x"], totalAvailableGpu: 8n, maxNodeFreeGpu: 8n });
await seed({ owner: "akash1intel", gpuModels: ["intel", "intel/gaudi3"], totalAvailableGpu: 8n, maxNodeFreeGpu: 8n });

const rows = await repository.findCandidates(
[
unit({
gpu: 1n,
gpuAttributes: [
{ key: "vendor/nvidia/model/a100", value: "true" },
{ key: "vendor/amd/model/mi300x", value: "true" }
]
})
],
requirements()
);

expect(owners(rows)).toEqual(["akash1amd", "akash1nvidia"]);
});

it("emits a separate clause per non-empty unit and ANDs them, so providers must cover divergent GPU needs", async () => {
await seed({ owner: "akash1nvidiaOnly", gpuModels: ["nvidia", "nvidia/a100"], totalAvailableGpu: 8n, maxNodeFreeGpu: 8n });
await seed({
owner: "akash1mixed",
gpuModels: ["nvidia", "nvidia/a100", "amd", "amd/mi300x"],
totalAvailableGpu: 8n,
maxNodeFreeGpu: 8n
});

const rows = await repository.findCandidates(
[
unit({ gpu: 1n, gpuAttributes: [{ key: "vendor/nvidia/model/a100", value: "true" }] }),
unit({ gpu: 1n, gpuAttributes: [{ key: "vendor/amd/model/mi300x", value: "true" }] })
],
requirements()
);

expect(owners(rows)).toEqual(["akash1mixed"]);
});

it("omits the clause for units without GPU requirements, so no-GPU providers stay in the result", async () => {
await seed({ owner: "akash1noGpu", gpuModels: [] });
await seed({ owner: "akash1withGpu", gpuModels: ["nvidia", "nvidia/a100"], totalAvailableGpu: 8n, maxNodeFreeGpu: 8n });

const rows = await repository.findCandidates([unit({})], requirements());

expect(owners(rows)).toEqual(["akash1noGpu", "akash1withGpu"]);
});
});

describe("online filter", () => {
it("excludes rows where is_online is false", async () => {
await seed({ owner: "akash1up" });
Expand Down Expand Up @@ -211,6 +281,7 @@ describe(BidScreeningRepository.name, () => {
maxNodeFreeGpu?: bigint;
selfAttributes?: { key: string; value: string }[];
auditedBy?: string[];
gpuModels?: string[];
inventory?: unknown;
}

Expand All @@ -230,19 +301,20 @@ describe(BidScreeningRepository.name, () => {
maxNodeFreeGpu: input.maxNodeFreeGpu ?? 0n,
selfAttributes: input.selfAttributes ?? [],
auditedBy: input.auditedBy ?? [],
gpuModels: input.gpuModels ?? [],
inventory: input.inventory ?? { nodes: [], storage: {} }
});
}
});

function unit(input: { cpu?: bigint; memory?: bigint; gpu?: bigint; count?: number }): RequestedResourceUnit {
function unit(input: { cpu?: bigint; memory?: bigint; gpu?: bigint; count?: number; gpuAttributes?: ResourceAttribute[] }): RequestedResourceUnit {
return {
id: 1,
count: input.count ?? 1,
resources: {
cpu: { units: input.cpu ?? 0n, attributes: [] },
memory: { quantity: input.memory ?? 0n, attributes: [] },
gpu: { units: input.gpu ?? 0n, attributes: [] },
gpu: { units: input.gpu ?? 0n, attributes: input.gpuAttributes ?? [] },
storage: []
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ export class BidScreeningRepository {
gte(providerInventory.maxNodeFreeGpu, criteria.maxPerReplicaGpu)
];

for (const unit of criteria.units) {
if (unit.gpuTokens.length > 0) {
conditions.push(arrayOverlaps(providerInventory.gpuModels, unit.gpuTokens));
}
}

if (criteria.attributes.length > 0) {
conditions.push(sql`${providerInventory.selfAttributes} @> ${sql.param(criteria.attributes)}::jsonb`);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ export class ClusterInventoryMatcherService {
if (!attr) continue;

pinnedSpec = {
vendor: info.vendor.toLowerCase(),
vendor: info.vendor,
model: info.name,
ram: info.memorySize || null,
interface: info.interface || null
Expand Down
Loading