From ebc1de61203414aea5f7c217871858fb666ec468 Mon Sep 17 00:00:00 2001
From: Christopher Tam <ohgodtamit@gmail.com>
Date: Mon, 6 Apr 2026 09:42:48 -0400
Subject: [PATCH 1/6] core: Add script to sync DeepInfra models

---
 packages/core/script/generate-deepinfra.ts | 687 +++++++++++++++++++++
 1 file changed, 687 insertions(+)
 create mode 100644 packages/core/script/generate-deepinfra.ts

diff --git a/packages/core/script/generate-deepinfra.ts b/packages/core/script/generate-deepinfra.ts
new file mode 100644
index 000000000..963748285
--- /dev/null
+++ b/packages/core/script/generate-deepinfra.ts
@@ -0,0 +1,687 @@
+#!/usr/bin/env bun
+
+/**
+ * Generates DeepInfra model TOML files from the OpenAI-compatible API.
+ *
+ * Flags:
+ * --dry-run: Preview changes without writing files
+ * --new-only: Only create new models, skip updating existing ones
+ * --no-delete: Keep orphaned files instead of deleting them
+ */
+
+import { z } from "zod";
+import path from "node:path";
+import { mkdir, unlink } from "node:fs/promises";
+import { ModelFamilyValues } from "../src/family.js";
+
+const API_ENDPOINT = "https://api.deepinfra.com/v1/openai/models";
+
+const PROVIDER_DENYLIST: string[] = [
+  "BAAI",
+  "Bria",
+  "Clarity",
+  "ClarityAI",
+  "intfloat",
+  "sentence-transformers",
+  "shibing624",
+  "stabilityai",
+  "thenlper",
+  "Wan-AI",
+];
+
+const MODEL_REGEX_DENYLIST: RegExp[] = [
+  /embed/i,
+  /(^|\/)FLUX/i,
+  /Seedream/i,
+  /Janus-Pro/i,
+  /p-image/i,
+  /Qwen-Image/i,
+];
+
+function shouldSkipModel(modelId: string): boolean {
+  const provider = modelId.split("/")[0];
+  if (provider && PROVIDER_DENYLIST.includes(provider)) {
+    return true;
+  }
+  return MODEL_REGEX_DENYLIST.some((pattern) => pattern.test(modelId));
+}
+
+enum StubbedFields {
+  ReleaseDate = "release_date",
+  LastUpdated = "last_updated",
+}
+
+const DeepInfraModel = z
+  .object({
+    id: z.string(),
+    object: z.string(),
+    created: z.number(),
+    owned_by: z.string(),
+    root: z.string(),
+    parent: z.string().nullable(),
+    metadata: z
+      .object({
+        description: z.string().optional(),
+        context_length: z.number().optional(),
+        max_tokens: z.number().optional(),
+        pricing: z
+          .object({
+            input_tokens: z.number().optional(),
+            output_tokens: z.number().optional(),
+            cache_read_tokens: z.number().optional(),
+            cache_write_tokens: z.number().optional(),
+          })
+          .passthrough()
+          .optional(),
+        tags: z.array(z.string()).optional(),
+      })
+      .passthrough()
+      .nullable(),
+  })
+  .passthrough();
+
+const DeepInfraResponse = z
+  .object({
+    object: z.string(),
+    data: z.array(DeepInfraModel),
+  })
+  .passthrough();
+
+interface ExistingModel {
+  name?: string;
+  family?: string;
+  attachment?: boolean;
+  reasoning?: boolean;
+  tool_call?: boolean;
+  structured_output?: boolean;
+  temperature?: boolean;
+  knowledge?: string;
+  release_date?: string;
+  last_updated?: string;
+  open_weights?: boolean;
+  interleaved?: boolean | { field: string };
+  status?: string;
+  cost?: {
+    input?: number;
+    output?: number;
+    cache_read?: number;
+    cache_write?: number;
+  };
+  limit?: {
+    context?: number;
+    input?: number;
+    output?: number;
+  };
+  modalities?: {
+    input?: string[];
+    output?: string[];
+  };
+}
+
+interface MergedModel {
+  name: string;
+  family?: string;
+  attachment: boolean;
+  reasoning: boolean;
+  tool_call: boolean;
+  structured_output?: boolean;
+  temperature: boolean;
+  knowledge?: string;
+  release_date: string;
+  last_updated: string;
+  open_weights: boolean;
+  interleaved?: boolean | { field: string };
+  status?: string;
+  cost?: {
+    input: number;
+    output: number;
+    cache_read?: number;
+    cache_write?: number;
+  };
+  limit: {
+    context: number;
+    input?: number;
+    output: number;
+  };
+  modalities: {
+    input: string[];
+    output: string[];
+  };
+}
+
+interface Changes {
+  field: string;
+  oldValue: string;
+  newValue: string;
+}
+
+function getTodayDate(): string {
+  return new Date().toISOString().slice(0, 10);
+}
+
+function formatNumber(n: number): string {
+  if (n >= 1000) {
+    return n.toString().replace(/\B(?=(\d{3})+(?!\d))/g, "_");
+  }
+  return n.toString();
+}
+
+function formatCost(n: number): string {
+  return n.toFixed(2);
+}
+
+function isSubstring(target: string, family: string): boolean {
+  return target.toLowerCase().includes(family.toLowerCase());
+}
+
+function matchesFamily(target: string, family: string): boolean {
+  const targetLower = target.toLowerCase();
+  const familyLower = family.toLowerCase();
+  let familyIdx = 0;
+
+  for (
+    let i = 0;
+    i < targetLower.length && familyIdx < familyLower.length;
+    i++
+  ) {
+    if (targetLower[i] === familyLower[familyIdx]) {
+      familyIdx++;
+    }
+  }
+
+  return familyIdx === familyLower.length;
+}
+
+function inferFamily(modelId: string): string | undefined {
+  const sortedFamilies = [...ModelFamilyValues].sort(
+    (a, b) => b.length - a.length,
+  );
+
+  for (const family of sortedFamilies) {
+    if (isSubstring(modelId, family)) {
+      return family;
+    }
+  }
+
+  for (const family of sortedFamilies) {
+    if (matchesFamily(modelId, family)) {
+      return family;
+    }
+  }
+
+  return undefined;
+}
+
+function buildInputModalities(
+  metadata: z.infer<typeof DeepInfraModel>["metadata"],
+): string[] {
+  const mods: string[] = ["text"];
+  const tags = new Set(metadata?.tags ?? []);
+
+  if (tags.has("vision") || tags.has("image")) {
+    mods.push("image");
+  }
+  if (tags.has("file-input")) {
+    mods.push("pdf");
+  }
+
+  return mods;
+}
+
+function buildOutputModalities(
+  metadata: z.infer<typeof DeepInfraModel>["metadata"],
+): string[] {
+  const mods: string[] = ["text"];
+  const tags = new Set(metadata?.tags ?? []);
+
+  if (tags.has("image-generation")) {
+    mods.push("image");
+  }
+
+  return mods;
+}
+
+async function loadExistingModel(
+  filePath: string,
+): Promise<ExistingModel | null> {
+  try {
+    const file = Bun.file(filePath);
+    if (!(await file.exists())) {
+      return null;
+    }
+    const toml = await import(filePath, { with: { type: "toml" } }).then(
+      (mod) => mod.default,
+    );
+    return toml as ExistingModel;
+  } catch (e) {
+    console.warn(`Warning: Failed to parse existing file ${filePath}:`, e);
+    return null;
+  }
+}
+
+function mergeModel(
+  apiModel: z.infer<typeof DeepInfraModel>,
+  existing: ExistingModel | null,
+): MergedModel {
+  const metadata = apiModel.metadata;
+  const tags = new Set(metadata?.tags ?? []);
+
+  const modelName = apiModel.id.split("/").pop() || apiModel.id;
+
+  const name = existing?.name ?? modelName;
+  const attachment =
+    existing?.attachment ?? (tags.has("vision") || tags.has("file-input"));
+  const reasoning = existing?.reasoning ?? tags.has("reasoning");
+  const toolCall = existing?.tool_call ?? tags.has("tool-use");
+  const openWeights =
+    existing?.open_weights ?? apiModel.owned_by !== "deepinfra";
+  const family = existing?.family ?? inferFamily(apiModel.id);
+  const structuredOutput = existing?.structured_output;
+  const knowledge = existing?.knowledge;
+  const interleaved = existing?.interleaved;
+  const status = existing?.status;
+
+  const releaseDate = existing?.release_date ?? getTodayDate();
+  const lastUpdated = existing?.last_updated ?? getTodayDate();
+
+  const contextLimit = metadata?.context_length ?? 0;
+  const outputLimit = metadata?.max_tokens ?? contextLimit;
+
+  const merged: MergedModel = {
+    name,
+    family,
+    attachment,
+    reasoning,
+    tool_call: toolCall,
+    temperature: true,
+    release_date: releaseDate,
+    last_updated: lastUpdated,
+    open_weights: openWeights,
+    ...(structuredOutput !== undefined && {
+      structured_output: structuredOutput,
+    }),
+    ...(knowledge && { knowledge }),
+    ...(interleaved !== undefined && { interleaved }),
+    ...(status && { status }),
+    limit: {
+      context: contextLimit,
+      ...(contextLimit > outputLimit && { input: contextLimit - outputLimit }),
+      output: outputLimit,
+    },
+    modalities: {
+      input: buildInputModalities(metadata),
+      output: buildOutputModalities(metadata),
+    },
+  };
+
+  if (metadata?.pricing) {
+    const pricing = metadata.pricing;
+    if (
+      pricing.input_tokens !== undefined &&
+      pricing.output_tokens !== undefined
+    ) {
+      merged.cost = {
+        input: pricing.input_tokens,
+        output: pricing.output_tokens,
+        ...(pricing.cache_read_tokens !== undefined && {
+          cache_read: pricing.cache_read_tokens,
+        }),
+        ...(pricing.cache_write_tokens !== undefined && {
+          cache_write: pricing.cache_write_tokens,
+        }),
+      };
+    }
+  }
+
+  return merged;
+}
+
+function formatToml(model: MergedModel): string {
+  const lines: string[] = [];
+
+  lines.push(`name = "${model.name.replace(/"/g, '\\"')}"`);
+  if (model.family) {
+    lines.push(`family = "${model.family}"`);
+  }
+  lines.push(`release_date = "${model.release_date}"`);
+  lines.push(`last_updated = "${model.last_updated}"`);
+  lines.push(`attachment = ${model.attachment}`);
+  lines.push(`reasoning = ${model.reasoning}`);
+  lines.push(`tool_call = ${model.tool_call}`);
+  if (model.structured_output !== undefined) {
+    lines.push(`structured_output = ${model.structured_output}`);
+  }
+  lines.push(`temperature = ${model.temperature}`);
+  if (model.knowledge) {
+    lines.push(`knowledge = "${model.knowledge}"`);
+  }
+  lines.push(`open_weights = ${model.open_weights}`);
+  if (model.status) {
+    lines.push(`status = "${model.status}"`);
+  }
+
+  if (model.interleaved !== undefined) {
+    lines.push("");
+    if (model.interleaved === true) {
+      lines.push(`interleaved = true`);
+    } else if (typeof model.interleaved === "object") {
+      lines.push(`[interleaved]`);
+      lines.push(`field = "${model.interleaved.field}"`);
+    }
+  }
+
+  if (model.cost) {
+    lines.push("");
+    lines.push(`[cost]`);
+    lines.push(`input = ${formatCost(model.cost.input)}`);
+    lines.push(`output = ${formatCost(model.cost.output)}`);
+    if (model.cost.cache_read !== undefined) {
+      lines.push(`cache_read = ${formatCost(model.cost.cache_read)}`);
+    }
+    if (model.cost.cache_write !== undefined) {
+      lines.push(`cache_write = ${formatCost(model.cost.cache_write)}`);
+    }
+  }
+
+  lines.push("");
+  lines.push(`[limit]`);
+  lines.push(`context = ${formatNumber(model.limit.context)}`);
+  if (model.limit.input !== undefined) {
+    lines.push(`input = ${formatNumber(model.limit.input)}`);
+  }
+  lines.push(`output = ${formatNumber(model.limit.output)}`);
+
+  lines.push("");
+  lines.push(`[modalities]`);
+  lines.push(
+    `input = [${model.modalities.input.map((m) => `"${m}"`).join(", ")}]`,
+  );
+  lines.push(
+    `output = [${model.modalities.output.map((m) => `"${m}"`).join(", ")}]`,
+  );
+
+  return lines.join("\n") + "\n";
+}
+
+function detectChanges(
+  existing: ExistingModel | null,
+  merged: MergedModel,
+): Changes[] {
+  if (!existing) return [];
+
+  const changes: Changes[] = [];
+
+  const shouldSkipStubbed = (field: string): boolean => {
+    return (
+      field === StubbedFields.ReleaseDate || field === StubbedFields.LastUpdated
+    );
+  };
+
+  const formatValue = (val: unknown, isCost = false): string => {
+    if (typeof val === "number") {
+      return isCost ? formatCost(val) : formatNumber(val);
+    }
+    if (Array.isArray(val)) return `[${val.join(", ")}]`;
+    if (val === undefined) return "(none)";
+    return String(val);
+  };
+
+  const isMaterialPriceDiff = (
+    oldPrice: unknown,
+    newPrice: unknown,
+  ): boolean => {
+    if (oldPrice === 0 && newPrice === undefined) return false;
+
+    if (oldPrice !== undefined && newPrice !== undefined) {
+      return (oldPrice as number).toFixed(2) !== (newPrice as number).toFixed(2);
+    }
+
+    return oldPrice !== newPrice;
+  };
+
+  const compare = (field: string, oldVal: unknown, newVal: unknown) => {
+    if (shouldSkipStubbed(field)) return;
+
+    const isDiff = field.startsWith("cost.")
+      ? isMaterialPriceDiff(oldVal, newVal)
+      : JSON.stringify(oldVal) !== JSON.stringify(newVal);
+
+    if (isDiff) {
+      const isCostField = field.startsWith("cost.");
+      changes.push({
+        field,
+        oldValue: formatValue(oldVal, isCostField),
+        newValue: formatValue(newVal, isCostField),
+      });
+    }
+  };
+
+  compare("name", existing.name, merged.name);
+  compare("family", existing.family, merged.family);
+  compare("attachment", existing.attachment, merged.attachment);
+  compare("reasoning", existing.reasoning, merged.reasoning);
+  compare("tool_call", existing.tool_call, merged.tool_call);
+  compare(
+    "structured_output",
+    existing.structured_output,
+    merged.structured_output,
+  );
+  compare("open_weights", existing.open_weights, merged.open_weights);
+  compare("release_date", existing.release_date, merged.release_date);
+  compare("last_updated", existing.last_updated, merged.last_updated);
+  compare("cost.input", existing.cost?.input, merged.cost?.input);
+  compare("cost.output", existing.cost?.output, merged.cost?.output);
+  compare(
+    "cost.cache_read",
+    existing.cost?.cache_read,
+    merged.cost?.cache_read,
+  );
+  compare(
+    "cost.cache_write",
+    existing.cost?.cache_write,
+    merged.cost?.cache_write,
+  );
+  compare("limit.context", existing.limit?.context, merged.limit.context);
+  compare("limit.input", existing.limit?.input, merged.limit.input);
+  compare("limit.output", existing.limit?.output, merged.limit.output);
+  compare(
+    "modalities.input",
+    existing.modalities?.input,
+    merged.modalities.input,
+  );
+  compare(
+    "modalities.output",
+    existing.modalities?.output,
+    merged.modalities.output,
+  );
+
+  return changes;
+}
+
+async function main() {
+  const args = process.argv.slice(2);
+  const dryRun = args.includes("--dry-run");
+  const newOnly = args.includes("--new-only");
+  const noDelete = args.includes("--no-delete");
+
+  const modelsDir = path.join(
+    import.meta.dirname,
+    "..",
+    "..",
+    "..",
+    "providers",
+    "deepinfra",
+    "models",
+  );
+
+  console.log(
+    `${dryRun ? "[DRY RUN] " : ""}${newOnly ? "[NEW ONLY] " : ""}${noDelete ? "[NO DELETE] " : ""}Fetching DeepInfra models from API...`,
+  );
+
+  const res = await fetch(API_ENDPOINT);
+  if (!res.ok) {
+    console.error(`Failed to fetch API: ${res.status} ${res.statusText}`);
+    process.exit(1);
+  }
+
+  const json = await res.json();
+  const parsed = DeepInfraResponse.safeParse(json);
+  if (!parsed.success) {
+    console.error("Invalid API response:", parsed.error.errors);
+    process.exit(1);
+  }
+
+  const apiModels = parsed.data.data;
+
+  const existingFiles = new Set<string>();
+  try {
+    for await (const file of new Bun.Glob("**/*.toml").scan({
+      cwd: modelsDir,
+      absolute: false,
+    })) {
+      existingFiles.add(file);
+    }
+  } catch {}
+
+  console.log(
+    `Found ${apiModels.length} models in API, ${existingFiles.size} existing files\n`,
+  );
+
+  const apiModelIds = new Set<string>();
+
+  let created = 0;
+  let updated = 0;
+  let unchanged = 0;
+  let deleted = 0;
+  let needsManualDates = 0;
+
+  console.log("⚠️  WARNING: DeepInfra API returns incomplete metadata:");
+  console.log(
+    "   - created: 0 (stubbed) - release_date/last_updated need manual setting",
+  );
+  console.log(
+    "   - open_weights, reasoning, attachment are inferred from tags",
+  );
+  console.log("   - Please verify these fields manually for new models\n");
+
+  for (const apiModel of apiModels) {
+    if (shouldSkipModel(apiModel.id)) {
+      continue;
+    }
+
+    const relativePath = `${apiModel.id}.toml`;
+    const filePath = path.join(modelsDir, relativePath);
+    const dirPath = path.dirname(filePath);
+
+    apiModelIds.add(relativePath);
+
+    const existing = await loadExistingModel(filePath);
+    const merged = mergeModel(apiModel, existing);
+    const tomlContent = formatToml(merged);
+
+    if (existing === null) {
+      created++;
+      if (apiModel.created === 0) {
+        needsManualDates++;
+      }
+      if (dryRun) {
+        console.log(`[DRY RUN] Would create: ${relativePath}`);
+        console.log(`  name = "${merged.name}"`);
+        if (merged.family) {
+          console.log(`  family = "${merged.family}" (inferred)`);
+        }
+        if (apiModel.created === 0) {
+          console.log(
+            `  ⚠️  release_date = "${merged.release_date}" (stubbed - needs manual update)`,
+          );
+          console.log(
+            `  ⚠️  last_updated = "${merged.last_updated}" (stubbed - needs manual update)`,
+          );
+        }
+        console.log("");
+      } else {
+        await mkdir(dirPath, { recursive: true });
+        await Bun.write(filePath, tomlContent);
+        console.log(`Created: ${relativePath}`);
+        if (apiModel.created === 0) {
+          console.log(
+            `  ⚠️  Please manually update release_date and last_updated`,
+          );
+        }
+      }
+    } else {
+      if (newOnly) {
+        unchanged++;
+        continue;
+      }
+
+      const changes = detectChanges(existing, merged);
+
+      if (changes.length > 0) {
+        updated++;
+        if (dryRun) {
+          console.log(`[DRY RUN] Would update: ${relativePath}`);
+        } else {
+          await mkdir(dirPath, { recursive: true });
+          await Bun.write(filePath, tomlContent);
+          console.log(`Updated: ${relativePath}`);
+        }
+        for (const change of changes) {
+          console.log(
+            `  ${change.field}: ${change.oldValue} → ${change.newValue}`,
+          );
+        }
+        console.log("");
+      } else {
+        unchanged++;
+      }
+    }
+  }
+
+  const orphaned: string[] = [];
+  for (const file of existingFiles) {
+    if (!apiModelIds.has(file)) {
+      orphaned.push(file);
+      const filePath = path.join(modelsDir, file);
+
+      if (noDelete) {
+        console.log(`Warning: Orphaned file (not in API): ${file}`);
+      } else if (dryRun) {
+        console.log(`[DRY RUN] Would delete: ${file}`);
+      } else {
+        try {
+          await unlink(filePath);
+          deleted++;
+          console.log(`Deleted: ${file}`);
+        } catch (e) {
+          console.warn(`Warning: Failed to delete ${file}:`, e);
+        }
+      }
+    }
+  }
+
+  console.log("");
+  const orphanedInfo =
+    noDelete && orphaned.length > 0 ? `, ${orphaned.length} orphaned` : "";
+  if (dryRun) {
+    console.log(
+      `Summary: ${created} would be created (${needsManualDates} with stubbed dates), ${updated} would be updated, ${unchanged} unchanged, ${deleted} would be deleted${orphanedInfo}`,
+    );
+  } else {
+    console.log(
+      `Summary: ${created} created (${needsManualDates} with stubbed dates), ${updated} updated, ${unchanged} unchanged, ${deleted} deleted${orphanedInfo}`,
+    );
+  }
+
+  if (created > 0) {
+    console.log("\n⚠️  IMPORTANT: Please manually review new models for:");
+    console.log(
+      "   - release_date and last_updated (API returns stubbed values)",
+    );
+    console.log(
+      "   - open_weights, reasoning, attachment (inferred from tags, may be inaccurate)",
+    );
+  }
+}
+
+await main();

From cb41a65d8f08b602c8b80d25af817ce273e37a55 Mon Sep 17 00:00:00 2001
From: Christopher Tam <ohgodtamit@gmail.com>
Date: Mon, 6 Apr 2026 09:43:11 -0400
Subject: [PATCH 2/6] deepinfra: Sync DeepInfra models

---
 .../deepinfra/models/ByteDance/Seed-1.8.toml  | 22 ++++++++++++++++
 .../models/ByteDance/Seed-2.0-mini.toml       | 22 ++++++++++++++++
 .../models/ByteDance/Seed-2.0-pro.toml        | 22 ++++++++++++++++
 .../models/Gryphe/MythoMax-L2-13b.toml        | 21 +++++++++++++++
 .../models/MiniMaxAI/MiniMax-M2.1.toml        | 19 +++++++-------
 .../models/MiniMaxAI/MiniMax-M2.5.toml        | 14 +++++-----
 .../models/MiniMaxAI/MiniMax-M2.toml          | 26 -------------------
 .../NousResearch/Hermes-3-Llama-3.1-405B.toml | 21 +++++++++++++++
 .../NousResearch/Hermes-3-Llama-3.1-70B.toml  | 21 +++++++++++++++
 .../PaddlePaddle/PaddleOCR-VL-0.9B.toml       | 21 +++++++++++++++
 .../models/Qwen/Qwen2.5-72B-Instruct.toml     | 21 +++++++++++++++
 .../models/Qwen/Qwen2.5-VL-32B-Instruct.toml  | 21 +++++++++++++++
 .../deepinfra/models/Qwen/Qwen3-14B.toml      | 21 +++++++++++++++
 .../Qwen/Qwen3-235B-A22B-Instruct-2507.toml   | 21 +++++++++++++++
 .../Qwen/Qwen3-235B-A22B-Thinking-2507.toml   | 22 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3-30B-A3B.toml  | 21 +++++++++++++++
 .../deepinfra/models/Qwen/Qwen3-32B.toml      | 21 +++++++++++++++
 .../Qwen3-Coder-480B-A35B-Instruct-Turbo.toml | 13 +++++-----
 .../Qwen/Qwen3-Coder-480B-A35B-Instruct.toml  | 12 ++++-----
 .../models/Qwen/Qwen3-Max-Thinking.toml       | 22 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3-Max.toml      | 22 ++++++++++++++++
 .../Qwen/Qwen3-Next-80B-A3B-Instruct.toml     | 21 +++++++++++++++
 .../Qwen/Qwen3-VL-235B-A22B-Instruct.toml     | 22 ++++++++++++++++
 .../Qwen/Qwen3-VL-30B-A3B-Instruct.toml       | 21 +++++++++++++++
 .../deepinfra/models/Qwen/Qwen3.5-0.8B.toml   | 22 ++++++++++++++++
 .../models/Qwen/Qwen3.5-122B-A10B.toml        | 22 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3.5-27B.toml    | 22 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3.5-2B.toml     | 22 ++++++++++++++++
 .../models/Qwen/Qwen3.5-35B-A3B.toml          | 22 ++++++++++++++++
 .../models/Qwen/Qwen3.5-397B-A17B.toml        | 21 +++++++++++++++
 .../deepinfra/models/Qwen/Qwen3.5-4B.toml     | 22 ++++++++++++++++
 .../deepinfra/models/Qwen/Qwen3.5-9B.toml     | 22 ++++++++++++++++
 .../models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml | 21 +++++++++++++++
 .../L3.1-70B-Euryale-v2.2.toml}               | 17 ++++++------
 .../models/Sao10K/L3.3-70B-Euryale-v2.3.toml  | 21 +++++++++++++++
 .../models/allenai/Olmo-3.1-32B-Instruct.toml | 21 +++++++++++++++
 .../models/allenai/olmOCR-2-7B-1025.toml      | 21 +++++++++++++++
 .../anthropic/claude-3-7-sonnet-latest.toml   |  8 +++---
 .../models/anthropic/claude-4-opus.toml       |  8 +++---
 .../models/anthropic/claude-4-sonnet.toml     | 22 ++++++++++++++++
 .../models/deepseek-ai/DeepSeek-OCR.toml      | 21 +++++++++++++++
 .../deepseek-ai/DeepSeek-R1-0528-Turbo.toml   | 22 ++++++++++++++++
 .../models/deepseek-ai/DeepSeek-R1-0528.toml  |  9 ++++---
 .../DeepSeek-R1-Distill-Llama-70B.toml        | 22 ++++++++++++++++
 .../models/deepseek-ai/DeepSeek-V3-0324.toml  | 23 ++++++++++++++++
 .../deepseek-ai/DeepSeek-V3.1-Terminus.toml   | 22 ++++++++++++++++
 .../models/deepseek-ai/DeepSeek-V3.1.toml     | 22 ++++++++++++++++
 .../models/deepseek-ai/DeepSeek-V3.2.toml     |  7 ++---
 .../models/deepseek-ai/DeepSeek-V3.toml       | 22 ++++++++++++++++
 .../models/google/gemini-1.5-flash-8b.toml    | 22 ++++++++++++++++
 .../models/google/gemini-1.5-flash.toml       | 22 ++++++++++++++++
 .../models/google/gemini-2.5-flash.toml       | 23 ++++++++++++++++
 .../models/google/gemini-2.5-pro.toml         | 23 ++++++++++++++++
 .../models/google/gemma-3-12b-it.toml         | 23 ++++++++++++++++
 .../models/google/gemma-3-27b-it.toml         | 23 ++++++++++++++++
 .../models/google/gemma-3-4b-it.toml          | 22 ++++++++++++++++
 .../Llama-3.2-11B-Vision-Instruct.toml        | 22 ++++++++++++++++
 .../Llama-3.3-70B-Instruct-Turbo.toml         |  3 ++-
 ...lama-4-Maverick-17B-128E-Instruct-FP8.toml |  6 +++--
 .../Llama-4-Scout-17B-16E-Instruct.toml       |  8 +++---
 .../models/meta-llama/Llama-Guard-4-12B.toml  | 22 ++++++++++++++++
 .../meta-llama/Meta-Llama-3-8B-Instruct.toml  | 21 +++++++++++++++
 ...=> Meta-Llama-3.1-70B-Instruct-Turbo.toml} | 10 ++++---
 ....toml => Meta-Llama-3.1-70B-Instruct.toml} |  9 ++++---
 ... => Meta-Llama-3.1-8B-Instruct-Turbo.toml} |  5 ++--
 ...t.toml => Meta-Llama-3.1-8B-Instruct.toml} |  9 ++++---
 .../deepinfra/models/microsoft/phi-4.toml     | 22 ++++++++++++++++
 .../mistralai/Mistral-Nemo-Instruct-2407.toml | 21 +++++++++++++++
 .../Mistral-Small-24B-Instruct-2501.toml      | 21 +++++++++++++++
 .../Mistral-Small-3.2-24B-Instruct-2506.toml  | 21 +++++++++++++++
 .../mistralai/Mixtral-8x7B-Instruct-v0.1.toml | 22 ++++++++++++++++
 .../moonshotai/Kimi-K2-Instruct-0905.toml     |  9 +++----
 .../models/moonshotai/Kimi-K2-Thinking.toml   |  6 ++---
 .../models/moonshotai/Kimi-K2.5-Turbo.toml    | 23 ++++++++++++++++
 .../models/moonshotai/Kimi-K2.5.toml          | 18 ++++++-------
 .../Llama-3.1-Nemotron-70B-Instruct.toml      | 21 +++++++++++++++
 .../Llama-3.3-Nemotron-Super-49B-v1.5.toml    | 21 +++++++++++++++
 .../NVIDIA-Nemotron-3-Super-120B-A12B.toml    | 23 ++++++++++++++++
 .../NVIDIA-Nemotron-Nano-12B-v2-VL.toml       | 22 ++++++++++++++++
 .../nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml    | 22 ++++++++++++++++
 .../nvidia/Nemotron-3-Nano-30B-A3B.toml       | 22 ++++++++++++++++
 .../models/openai/gpt-oss-120b-Turbo.toml     | 21 +++++++++++++++
 .../deepinfra/models/openai/gpt-oss-120b.toml | 11 +++-----
 .../deepinfra/models/openai/gpt-oss-20b.toml  |  7 ++---
 .../models/stepfun-ai/Step-3.5-Flash.toml     | 22 ++++++++++++++++
 .../deepinfra/models/zai-org/GLM-4.5.toml     | 26 -------------------
 .../deepinfra/models/zai-org/GLM-4.6.toml     | 13 +++++-----
 .../deepinfra/models/zai-org/GLM-4.6V.toml    | 11 ++++----
 .../models/zai-org/GLM-4.7-Flash.toml         | 11 ++++----
 .../deepinfra/models/zai-org/GLM-4.7.toml     | 14 +++++-----
 providers/deepinfra/models/zai-org/GLM-5.toml | 14 +++++-----
 91 files changed, 1502 insertions(+), 189 deletions(-)
 create mode 100644 providers/deepinfra/models/ByteDance/Seed-1.8.toml
 create mode 100644 providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml
 create mode 100644 providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml
 create mode 100644 providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
 delete mode 100644 providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
 create mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
 create mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
 create mode 100644 providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-14B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-32B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-Max-Thinking.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-Max.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-27B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-2B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-4B.toml
 create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-9B.toml
 create mode 100644 providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
 rename providers/deepinfra/models/{moonshotai/Kimi-K2-Instruct.toml => Sao10K/L3.1-70B-Euryale-v2.2.toml} (53%)
 create mode 100644 providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml
 create mode 100644 providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml
 create mode 100644 providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml
 create mode 100644 providers/deepinfra/models/anthropic/claude-4-sonnet.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-OCR.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
 create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
 create mode 100644 providers/deepinfra/models/google/gemini-1.5-flash-8b.toml
 create mode 100644 providers/deepinfra/models/google/gemini-1.5-flash.toml
 create mode 100644 providers/deepinfra/models/google/gemini-2.5-flash.toml
 create mode 100644 providers/deepinfra/models/google/gemini-2.5-pro.toml
 create mode 100644 providers/deepinfra/models/google/gemma-3-12b-it.toml
 create mode 100644 providers/deepinfra/models/google/gemma-3-27b-it.toml
 create mode 100644 providers/deepinfra/models/google/gemma-3-4b-it.toml
 create mode 100644 providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml
 create mode 100644 providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml
 create mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml
 rename providers/deepinfra/models/meta-llama/{Llama-3.1-70B-Instruct.toml => Meta-Llama-3.1-70B-Instruct-Turbo.toml} (56%)
 rename providers/deepinfra/models/meta-llama/{Llama-3.1-70B-Instruct-Turbo.toml => Meta-Llama-3.1-70B-Instruct.toml} (61%)
 rename providers/deepinfra/models/meta-llama/{Llama-3.1-8B-Instruct-Turbo.toml => Meta-Llama-3.1-8B-Instruct-Turbo.toml} (76%)
 rename providers/deepinfra/models/meta-llama/{Llama-3.1-8B-Instruct.toml => Meta-Llama-3.1-8B-Instruct.toml} (61%)
 create mode 100644 providers/deepinfra/models/microsoft/phi-4.toml
 create mode 100644 providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
 create mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
 create mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
 create mode 100644 providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml
 create mode 100644 providers/deepinfra/models/moonshotai/Kimi-K2.5-Turbo.toml
 create mode 100644 providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml
 create mode 100644 providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml
 create mode 100644 providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml
 create mode 100644 providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL.toml
 create mode 100644 providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
 create mode 100644 providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml
 create mode 100644 providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml
 create mode 100644 providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml
 delete mode 100644 providers/deepinfra/models/zai-org/GLM-4.5.toml

diff --git a/providers/deepinfra/models/ByteDance/Seed-1.8.toml b/providers/deepinfra/models/ByteDance/Seed-1.8.toml
new file mode 100644
index 000000000..1b2a295ff
--- /dev/null
+++ b/providers/deepinfra/models/ByteDance/Seed-1.8.toml
@@ -0,0 +1,22 @@
+name = "Seed 1.8"
+family = "seed"
+release_date = "2025-12-18"
+last_updated = "2026-02-25"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = false
+
+[cost]
+input = 0.25
+output = 2.00
+cache_read = 0.05
+
+[limit]
+context = 256_000
+output = 256_000
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml b/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml
new file mode 100644
index 000000000..dd252063f
--- /dev/null
+++ b/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml
@@ -0,0 +1,22 @@
+name = "Seed-2.0-mini"
+family = "seed"
+release_date = "2026-02-26"
+last_updated = "2026-02-26"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = false
+
+[cost]
+input = 0.10
+output = 0.40
+cache_read = 0.02
+
+[limit]
+context = 256_000
+output = 256_000
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml b/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml
new file mode 100644
index 000000000..6f8c1e024
--- /dev/null
+++ b/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml
@@ -0,0 +1,22 @@
+name = "Seed-2.0-pro"
+family = "seed"
+release_date = "2026-02-14"
+last_updated = "2026-02-14"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = false
+
+[cost]
+input = 0.50
+output = 3.00
+cache_read = 0.10
+
+[limit]
+context = 256_000
+output = 256_000
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
new file mode 100644
index 000000000..473d3ebbc
--- /dev/null
+++ b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
@@ -0,0 +1,21 @@
+name = "MythoMax 13B"
+family = "o"
+release_date = "2024-04-25"
+last_updated = "2024-04-25"
+attachment = false
+reasoning = false
+tool_call = false
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.40
+output = 0.40
+
+[limit]
+context = 4_096
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
index a0b0ac5d7..c0896240f 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml
@@ -1,25 +1,26 @@
 name = "MiniMax M2.1"
+family = "minimax"
 release_date = "2025-12-23"
 last_updated = "2025-12-23"
 attachment = false
 reasoning = true
-temperature = true
 tool_call = true
-open_weights = true
+temperature = true
 knowledge = "2025-06"
+open_weights = true
+
+[interleaved]
+field = "reasoning_content"
 
 [cost]
-input = 0.28
-output = 1.20
-cached_read = 0.14
+input = 0.27
+output = 0.95
+cache_read = 0.03
 
 [limit]
 context = 196_608
 output = 196_608
- 
+
 [modalities]
 input = ["text"]
 output = ["text"]
-
-[interleaved]
-field = "reasoning_content"
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
index bbbdbe470..163cf2b73 100644
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
+++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
@@ -1,28 +1,26 @@
-# https://deepinfra.com/MiniMaxAI/MiniMax-M2.5
 name = "MiniMax M2.5"
 family = "minimax"
 release_date = "2026-02-12"
 last_updated = "2026-02-12"
 attachment = false
 reasoning = true
-temperature = true
 tool_call = true
+temperature = true
 knowledge = "2025-06"
 open_weights = true
 
+[interleaved]
+field = "reasoning_content"
+
 [cost]
 input = 0.27
 output = 0.95
 cache_read = 0.03
-cache_write = 0.375
 
 [limit]
-context = 204_800
-output = 131_072
+context = 196_608
+output = 196_608
 
 [modalities]
 input = ["text"]
 output = ["text"]
-
-[interleaved]
-field = "reasoning_content"
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
deleted file mode 100644
index e726226c7..000000000
--- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml
+++ /dev/null
@@ -1,26 +0,0 @@
-name = "MiniMax M2"
-family = "minimax"
-release_date = "2025-11-13"
-last_updated = "2025-11-13"
-attachment = false
-reasoning = true
-temperature = true
-tool_call = true
-knowledge = "2024-10"
-open_weights = true
-
-[interleaved]
-field = "reasoning_content"
-
-[cost]
-input = 0.254
-output = 1.02
-cached_input = 0.127
-
-[limit]
-context = 262_144
-output = 32_768
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
new file mode 100644
index 000000000..27aebb6a0
--- /dev/null
+++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
@@ -0,0 +1,21 @@
+name = "Hermes 3 Llama 3.1 405B"
+family = "nousresearch"
+release_date = "2024-08-16"
+last_updated = "2024-08-16"
+attachment = false
+reasoning = false
+tool_call = false
+temperature = true
+open_weights = true
+
+[cost]
+input = 1.00
+output = 1.00
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
new file mode 100644
index 000000000..f9a61481e
--- /dev/null
+++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
@@ -0,0 +1,21 @@
+name = "Hermes 3 Llama 3.1 70B"
+family = "nousresearch"
+release_date = "2024-08-18"
+last_updated = "2026-03-15"
+attachment = false
+reasoning = false
+tool_call = false
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.30
+output = 0.30
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml b/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml
new file mode 100644
index 000000000..62a9150db
--- /dev/null
+++ b/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml
@@ -0,0 +1,21 @@
+name = "PaddleOCR-VL-0.9B"
+family = "o"
+release_date = "2025-10-22"
+last_updated = "2025-10-22"
+attachment = true
+reasoning = false
+tool_call = false
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.14
+output = 0.80
+
+[limit]
+context = 16_384
+output = 16_384
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
new file mode 100644
index 000000000..83b19b273
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
@@ -0,0 +1,21 @@
+name = "Qwen 2.5 72B Instruct"
+family = "qwen"
+release_date = "2024-09-18"
+last_updated = "2025-11-25"
+attachment = false
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.12
+output = 0.39
+
+[limit]
+context = 32_768
+output = 32_768
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml
new file mode 100644
index 000000000..f375659e6
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml
@@ -0,0 +1,21 @@
+name = "Qwen 2.5 VL 32B Instruct"
+family = "qwen"
+release_date = "2025-03-24"
+last_updated = "2025-11-25"
+attachment = true
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.20
+output = 0.60
+
+[limit]
+context = 128_000
+output = 128_000
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-14B.toml b/providers/deepinfra/models/Qwen/Qwen3-14B.toml
new file mode 100644
index 000000000..246d8a522
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-14B.toml
@@ -0,0 +1,21 @@
+name = "Qwen 3 14B"
+family = "qwen"
+release_date = "2025-04-30"
+last_updated = "2025-11-25"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.12
+output = 0.24
+
+[limit]
+context = 40_960
+output = 40_960
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
new file mode 100644
index 000000000..bafeb56fd
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
@@ -0,0 +1,21 @@
+name = "Qwen 3 235B A22B Instruct (2507)"
+family = "qwen"
+release_date = "2025-07-23"
+last_updated = "2025-11-25"
+attachment = false
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.07
+output = 0.10
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
new file mode 100644
index 000000000..7b5be5295
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
@@ -0,0 +1,22 @@
+name = "Qwen 3 235B A22B Thinking (2507)"
+family = "qwen"
+release_date = "2025-07-23"
+last_updated = "2025-11-25"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.23
+output = 2.30
+cache_read = 0.20
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml
new file mode 100644
index 000000000..ef8796562
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml
@@ -0,0 +1,21 @@
+name = "Qwen 3 30B A3B"
+family = "qwen"
+release_date = "2025-04-29"
+last_updated = "2025-04-29"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.08
+output = 0.28
+
+[limit]
+context = 40_960
+output = 40_960
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-32B.toml b/providers/deepinfra/models/Qwen/Qwen3-32B.toml
new file mode 100644
index 000000000..a07153996
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-32B.toml
@@ -0,0 +1,21 @@
+name = "Qwen 3 32B"
+family = "qwen"
+release_date = "2025-04-30"
+last_updated = "2025-11-25"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = false
+
+[cost]
+input = 0.08
+output = 0.28
+
+[limit]
+context = 40_960
+output = 40_960
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
index 92f8cb540..6811d8732 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
@@ -1,22 +1,23 @@
-name = "Qwen3 Coder 480B A35B Instruct Turbo"
+name = "Qwen 3 Coder 480B A35B Instruct Turbo"
 family = "qwen"
 release_date = "2025-07-23"
 last_updated = "2025-07-23"
 attachment = false
 reasoning = false
+tool_call = true
 temperature = true
 knowledge = "2025-04"
-tool_call = true
 open_weights = true
 
 [cost]
-input = 0.3
-output = 1.2
+input = 0.22
+output = 1.00
+cache_read = 0.02
 
 [limit]
 context = 262_144
-output = 66_536
+output = 262_144
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
index 08c0ed4ab..6d74e028f 100644
--- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
+++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
@@ -1,22 +1,22 @@
-name = "Qwen3 Coder 480B A35B Instruct"
+name = "Qwen 3 Coder 480B A35B Instruct"
 family = "qwen"
 release_date = "2025-07-23"
 last_updated = "2025-07-23"
 attachment = false
 reasoning = false
+tool_call = true
 temperature = true
 knowledge = "2025-04"
-tool_call = true
 open_weights = true
 
 [cost]
-input = 0.4
-output = 1.6
+input = 0.40
+output = 1.60
 
 [limit]
 context = 262_144
-output = 66_536
+output = 262_144
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Max-Thinking.toml b/providers/deepinfra/models/Qwen/Qwen3-Max-Thinking.toml
new file mode 100644
index 000000000..8cc40b516
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-Max-Thinking.toml
@@ -0,0 +1,22 @@
+name = "Qwen 3 Max Thinking"
+family = "qwen"
+release_date = "2026-01-23"
+last_updated = "2026-03-15"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = false
+
+[cost]
+input = 1.20
+output = 6.00
+cache_read = 0.24
+
+[limit]
+context = 256_000
+output = 256_000
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Max.toml b/providers/deepinfra/models/Qwen/Qwen3-Max.toml
new file mode 100644
index 000000000..351e14eea
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-Max.toml
@@ -0,0 +1,22 @@
+name = "Qwen 3 Max"
+family = "qwen"
+release_date = "2026-04-06"
+last_updated = "2026-04-06"
+attachment = false
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = false
+
+[cost]
+input = 1.20
+output = 6.00
+cache_read = 0.24
+
+[limit]
+context = 256_000
+output = 256_000
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml
new file mode 100644
index 000000000..97bba055b
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml
@@ -0,0 +1,21 @@
+name = "Qwen 3 Next 80B A3B Instruct"
+family = "qwen"
+release_date = "2025-09-11"
+last_updated = "2026-03-15"
+attachment = false
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.09
+output = 1.10
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml
new file mode 100644
index 000000000..4af5e30f5
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml
@@ -0,0 +1,22 @@
+name = "Qwen 3 VL 235B A22B Instruct"
+family = "qwen"
+release_date = "2025-09-23"
+last_updated = "2026-01-10"
+attachment = true
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.20
+output = 0.88
+cache_read = 0.11
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml
new file mode 100644
index 000000000..2e7e06e19
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml
@@ -0,0 +1,21 @@
+name = "Qwen 3 VL 30B A3B Instruct"
+family = "qwen"
+release_date = "2025-10-05"
+last_updated = "2025-11-25"
+attachment = true
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = false
+
+[cost]
+input = 0.15
+output = 0.60
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml
new file mode 100644
index 000000000..027ff8129
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml
@@ -0,0 +1,22 @@
+name = "Qwen 3.5 0.8B"
+family = "qwen"
+release_date = "2026-02-23"
+last_updated = "2026-03-26"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2026-01"
+open_weights = true
+
+[cost]
+input = 0.01
+output = 0.05
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml
new file mode 100644
index 000000000..460fc9b11
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml
@@ -0,0 +1,22 @@
+name = "Qwen 3.5 122B A10B"
+family = "qwen"
+release_date = "2026-02-26"
+last_updated = "2026-03-15"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2026-01"
+open_weights = true
+
+[cost]
+input = 0.29
+output = 2.90
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml
new file mode 100644
index 000000000..45d119b1d
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml
@@ -0,0 +1,22 @@
+name = "Qwen 3.5 27B"
+family = "qwen"
+release_date = "2026-02-26"
+last_updated = "2026-03-15"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2026-01"
+open_weights = true
+
+[cost]
+input = 0.26
+output = 2.60
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml
new file mode 100644
index 000000000..a8e8052af
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml
@@ -0,0 +1,22 @@
+name = "Qwen 3.5 2B"
+family = "qwen"
+release_date = "2026-03-02"
+last_updated = "2026-03-02"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2026-01"
+open_weights = true
+
+[cost]
+input = 0.02
+output = 0.10
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
new file mode 100644
index 000000000..cda3af4da
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
@@ -0,0 +1,22 @@
+name = "Qwen 3.5 35B A3B"
+family = "qwen"
+release_date = "2026-02-26"
+last_updated = "2026-03-15"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2026-01"
+open_weights = true
+
+[cost]
+input = 0.22
+output = 2.20
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
new file mode 100644
index 000000000..0b748a722
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
@@ -0,0 +1,21 @@
+name = "Qwen 3.5 397B A17B"
+family = "qwen"
+release_date = "2026-02-15"
+last_updated = "2026-03-15"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.54
+output = 3.40
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml
new file mode 100644
index 000000000..adf9c0c4f
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml
@@ -0,0 +1,22 @@
+name = "Qwen 3.5 4B"
+family = "qwen"
+release_date = "2026-03-02"
+last_updated = "2026-03-02"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2026-01"
+open_weights = true
+
+[cost]
+input = 0.03
+output = 0.15
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml
new file mode 100644
index 000000000..ac1f3500c
--- /dev/null
+++ b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml
@@ -0,0 +1,22 @@
+name = "Qwen 3.5 9B"
+family = "qwen"
+release_date = "2026-03-10"
+last_updated = "2026-03-15"
+attachment = true
+reasoning = false
+tool_call = true
+temperature = true
+knowledge = "2026-01"
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.20
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
new file mode 100644
index 000000000..2bdebc4f7
--- /dev/null
+++ b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
@@ -0,0 +1,21 @@
+name = "L3 8B Lunaris v1 Turbo"
+family = "o"
+release_date = "2024-08-13"
+last_updated = "2026-03-15"
+attachment = false
+reasoning = false
+tool_call = false
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.05
+
+[limit]
+context = 8_192
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
similarity index 53%
rename from providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml
rename to providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
index 285310a44..7aeb7931b 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml
+++ b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
@@ -1,21 +1,20 @@
-name = "Kimi K2"
-family = "kimi"
-release_date = "2025-07-11"
-last_updated = "2025-07-11"
+name = "L3.1 70B Euryale v2.2"
+family = "o"
+release_date = "2024-09-19"
+last_updated = "2024-09-19"
 attachment = false
 reasoning = false
-temperature = true
 tool_call = true
-knowledge = "2024-10"
+temperature = true
 open_weights = true
 
 [cost]
-input = 0.50
-output = 2.00
+input = 0.85
+output = 0.85
 
 [limit]
 context = 131_072
-output = 32_768
+output = 131_072
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml b/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml
new file mode 100644
index 000000000..daccc1e8e
--- /dev/null
+++ b/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml
@@ -0,0 +1,21 @@
+name = "L3.3 70B Euryale v2.3"
+family = "o"
+release_date = "2024-12-06"
+last_updated = "2024-12-06"
+attachment = false
+reasoning = false
+tool_call = false
+temperature = true
+open_weights = false
+
+[cost]
+input = 0.85
+output = 0.85
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml b/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml
new file mode 100644
index 000000000..f9c5727ea
--- /dev/null
+++ b/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml
@@ -0,0 +1,21 @@
+name = "Olmo 3.1 32B Instruct"
+family = "allenai"
+release_date = "2026-01-07"
+last_updated = "2026-03-15"
+attachment = false
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.20
+output = 0.60
+
+[limit]
+context = 65_536
+output = 65_536
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml b/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml
new file mode 100644
index 000000000..fb09bf38a
--- /dev/null
+++ b/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml
@@ -0,0 +1,21 @@
+name = "olmOCR-2-7B-1025"
+family = "allenai"
+release_date = "2025-10-22"
+last_updated = "2025-10-22"
+attachment = true
+reasoning = false
+tool_call = false
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.09
+output = 0.19
+
+[limit]
+context = 16_384
+output = 16_384
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml
index 28d93a4fb..2aa134f79 100644
--- a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml
+++ b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml
@@ -4,19 +4,19 @@ release_date = "2025-03-13"
 last_updated = "2025-03-13"
 attachment = true
 reasoning = true
+tool_call = true
 temperature = true
 knowledge = "2024-10-31"
-tool_call = true
 open_weights = false
 
 [cost]
-input = 3.3
-output = 16.5
+input = 3.30
+output = 16.50
 cache_read = 0.33
 
 [limit]
 context = 200_000
-output = 64_000
+output = 200_000
 
 [modalities]
 input = ["text", "image"]
diff --git a/providers/deepinfra/models/anthropic/claude-4-opus.toml b/providers/deepinfra/models/anthropic/claude-4-opus.toml
index 51f441d78..f481ff1f9 100644
--- a/providers/deepinfra/models/anthropic/claude-4-opus.toml
+++ b/providers/deepinfra/models/anthropic/claude-4-opus.toml
@@ -4,18 +4,18 @@ release_date = "2025-06-12"
 last_updated = "2025-06-12"
 attachment = true
 reasoning = true
+tool_call = true
 temperature = true
 knowledge = "2025-03-31"
-tool_call = true
 open_weights = false
 
 [cost]
-input = 16.5
-output = 82.5
+input = 16.50
+output = 82.50
 
 [limit]
 context = 200_000
-output = 32_000
+output = 200_000
 
 [modalities]
 input = ["text", "image"]
diff --git a/providers/deepinfra/models/anthropic/claude-4-sonnet.toml b/providers/deepinfra/models/anthropic/claude-4-sonnet.toml
new file mode 100644
index 000000000..494f59682
--- /dev/null
+++ b/providers/deepinfra/models/anthropic/claude-4-sonnet.toml
@@ -0,0 +1,22 @@
+name = "Claude 4 Sonnet"
+family = "claude"
+release_date = "2025-05-22"
+last_updated = "2025-05-22"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2025-03-31"
+open_weights = false
+
+[cost]
+input = 3.30
+output = 16.50
+
+[limit]
+context = 200_000
+output = 200_000
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-OCR.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-OCR.toml
new file mode 100644
index 000000000..20fd7c6fe
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-OCR.toml
@@ -0,0 +1,21 @@
+name = "DeepSeek OCR"
+family = "deepseek"
+release_date = "2024-12-13"
+last_updated = "2025-01-15"
+attachment = true
+reasoning = false
+tool_call = false
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.03
+output = 0.10
+
+[limit]
+context = 8_192
+output = 8_192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
new file mode 100644
index 000000000..455a2b95c
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml
@@ -0,0 +1,22 @@
+name = "DeepSeek R1 0528 Turbo"
+family = "deepseek"
+release_date = "2025-05-28"
+last_updated = "2025-05-28"
+attachment = false
+reasoning = true
+tool_call = false
+temperature = true
+knowledge = "2025-03"
+open_weights = false
+
+[cost]
+input = 1.00
+output = 3.00
+
+[limit]
+context = 32_768
+output = 32_768
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml
index 2733a6938..5546f2193 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml
@@ -1,24 +1,25 @@
-name = "DeepSeek-R1-0528"
+name = "DeepSeek R1 0528"
+family = "deepseek"
 release_date = "2025-05-28"
 last_updated = "2025-05-28"
 attachment = false
 reasoning = true
+tool_call = true
 temperature = true
 knowledge = "2024-07"
-tool_call = false
 open_weights = false
 
 [interleaved]
 field = "reasoning_content"
 
 [cost]
-input = 0.5
+input = 0.50
 output = 2.15
 cache_read = 0.35
 
 [limit]
 context = 163_840
-output = 64_000
+output = 163_840
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
new file mode 100644
index 000000000..4f3cb97cc
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml
@@ -0,0 +1,22 @@
+name = "DeepSeek R1 Distill Llama 70B"
+family = "deepseek"
+release_date = "2025-01-20"
+last_updated = "2025-01-20"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2024-07"
+open_weights = true
+
+[cost]
+input = 0.70
+output = 0.80
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
new file mode 100644
index 000000000..c84aaf23b
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
@@ -0,0 +1,23 @@
+name = "DeepSeek V3 0324"
+family = "deepseek"
+release_date = "2025-03-24"
+last_updated = "2025-03-24"
+attachment = false
+reasoning = false
+tool_call = true
+temperature = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.20
+output = 0.77
+cache_read = 0.14
+
+[limit]
+context = 163_840
+output = 163_840
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
new file mode 100644
index 000000000..3c808740d
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
@@ -0,0 +1,22 @@
+name = "DeepSeek V3.1 Terminus"
+family = "deepseek"
+release_date = "2025-09-22"
+last_updated = "2025-10-15"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.21
+output = 0.79
+cache_read = 0.13
+
+[limit]
+context = 163_840
+output = 163_840
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
new file mode 100644
index 000000000..975c04df2
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
@@ -0,0 +1,22 @@
+name = "DeepSeek V3.1"
+family = "deepseek"
+release_date = "2025-08-21"
+last_updated = "2025-08-21"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.21
+output = 0.79
+cache_read = 0.13
+
+[limit]
+context = 163_840
+output = 163_840
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml
index 74d5e7d60..47e3f1085 100644
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml
@@ -1,11 +1,12 @@
-name = "DeepSeek-V3.2"
+name = "DeepSeek V3.2"
+family = "deepseek"
 release_date = "2025-12-02"
 last_updated = "2025-12-02"
 attachment = false
 reasoning = true
+tool_call = true
 temperature = true
 knowledge = "2024-12"
-tool_call = true
 open_weights = false
 
 [interleaved]
@@ -18,7 +19,7 @@ cache_read = 0.13
 
 [limit]
 context = 163_840
-output = 64_000
+output = 163_840
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
new file mode 100644
index 000000000..b2a43d543
--- /dev/null
+++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
@@ -0,0 +1,22 @@
+name = "DeepSeek-V3"
+family = "deepseek"
+release_date = "2024-12-26"
+last_updated = "2025-01-20"
+attachment = false
+reasoning = false
+tool_call = true
+temperature = true
+knowledge = "2024-07"
+open_weights = true
+
+[cost]
+input = 0.32
+output = 0.89
+
+[limit]
+context = 163_840
+output = 163_840
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml b/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml
new file mode 100644
index 000000000..465ac3578
--- /dev/null
+++ b/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml
@@ -0,0 +1,22 @@
+name = "Gemini 1.5 Flash"
+family = "gemini-flash"
+release_date = "2024-05-14"
+last_updated = "2024-05-14"
+attachment = true
+reasoning = false
+tool_call = true
+temperature = true
+knowledge = "2024-04"
+open_weights = false
+
+[cost]
+input = 0.04
+output = 0.15
+
+[limit]
+context = 1_000_000
+output = 1_000_000
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemini-1.5-flash.toml b/providers/deepinfra/models/google/gemini-1.5-flash.toml
new file mode 100644
index 000000000..a9904cb76
--- /dev/null
+++ b/providers/deepinfra/models/google/gemini-1.5-flash.toml
@@ -0,0 +1,22 @@
+name = "gemini-1.5-flash"
+family = "gemini-flash"
+release_date = "2024-05-14"
+last_updated = "2024-05-14"
+attachment = true
+reasoning = false
+tool_call = true
+temperature = true
+knowledge = "2024-04"
+open_weights = false
+
+[cost]
+input = 0.07
+output = 0.30
+
+[limit]
+context = 1_000_000
+output = 1_000_000
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemini-2.5-flash.toml b/providers/deepinfra/models/google/gemini-2.5-flash.toml
new file mode 100644
index 000000000..4867736b7
--- /dev/null
+++ b/providers/deepinfra/models/google/gemini-2.5-flash.toml
@@ -0,0 +1,23 @@
+name = "Gemini 2.5 Flash"
+family = "gemini-flash"
+release_date = "2025-03-20"
+last_updated = "2025-06-05"
+attachment = true
+reasoning = true
+temperature = true
+knowledge = "2025-01"
+tool_call = true
+structured_output = true
+open_weights = false
+
+[cost]
+input = 0.30
+output = 2.50
+
+[limit]
+context = 1_000_000
+output = 1_000_000
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemini-2.5-pro.toml b/providers/deepinfra/models/google/gemini-2.5-pro.toml
new file mode 100644
index 000000000..3ad893819
--- /dev/null
+++ b/providers/deepinfra/models/google/gemini-2.5-pro.toml
@@ -0,0 +1,23 @@
+name = "Gemini 2.5 Pro"
+family = "gemini-pro"
+release_date = "2025-03-20"
+last_updated = "2025-06-05"
+attachment = true
+reasoning = true
+temperature = true
+knowledge = "2025-01"
+tool_call = true
+structured_output = true
+open_weights = false
+
+[cost]
+input = 1.25
+output = 10.00
+
+[limit]
+context = 1_000_000
+output = 1_000_000
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-3-12b-it.toml b/providers/deepinfra/models/google/gemma-3-12b-it.toml
new file mode 100644
index 000000000..dc03b0c68
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-3-12b-it.toml
@@ -0,0 +1,23 @@
+name = "Gemma 3 12B"
+family = "gemma"
+release_date = "2025-03-13"
+last_updated = "2025-03-13"
+attachment = true
+reasoning = false
+temperature = true
+knowledge = "2024-10"
+tool_call = false
+structured_output = true
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.13
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-3-27b-it.toml b/providers/deepinfra/models/google/gemma-3-27b-it.toml
new file mode 100644
index 000000000..196f4a12f
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-3-27b-it.toml
@@ -0,0 +1,23 @@
+name = "Gemma 3 27B"
+family = "gemma"
+release_date = "2025-03-12"
+last_updated = "2025-03-12"
+attachment = true
+reasoning = false
+temperature = true
+knowledge = "2024-10"
+tool_call = true
+structured_output = true
+open_weights = true
+
+[cost]
+input = 0.08
+output = 0.16
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-3-4b-it.toml b/providers/deepinfra/models/google/gemma-3-4b-it.toml
new file mode 100644
index 000000000..5b5c634d3
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-3-4b-it.toml
@@ -0,0 +1,22 @@
+name = "Gemma 3 4B"
+family = "gemma"
+release_date = "2025-03-13"
+last_updated = "2025-03-13"
+attachment = true
+reasoning = false
+temperature = true
+knowledge = "2024-10"
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.08
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml
new file mode 100644
index 000000000..e5df00a48
--- /dev/null
+++ b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml
@@ -0,0 +1,22 @@
+name = "Llama 3.2 11B Vision Instruct"
+family = "llama"
+release_date = "2024-09-25"
+last_updated = "2024-09-25"
+attachment = true
+reasoning = false
+tool_call = true
+temperature = true
+knowledge = "2023-12"
+open_weights = true
+
+[cost]
+input = 0.05
+output = 0.05
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml
index df433558b..239dab88f 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml
@@ -5,6 +5,7 @@ last_updated = "2024-12-06"
 attachment = false
 reasoning = false
 tool_call = true
+temperature = true
 open_weights = true
 
 [cost]
@@ -13,7 +14,7 @@ output = 0.32
 
 [limit]
 context = 131_072
-output = 16_384
+output = 131_072
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml
index 1a2b88bbc..ff9c6f29a 100644
--- a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml
@@ -5,6 +5,8 @@ last_updated = "2025-04-05"
 attachment = false
 reasoning = false
 tool_call = true
+temperature = true
+knowledge = "2024-08"
 open_weights = true
 
 [cost]
@@ -12,8 +14,8 @@ input = 0.15
 output = 0.60
 
 [limit]
-context = 1_000_000
-output = 16_384
+context = 1_048_576
+output = 1_048_576
 
 [modalities]
 input = ["text", "image"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
index 8f7bab10f..751f9d9d2 100644
--- a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
+++ b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
@@ -2,9 +2,11 @@ name = "Llama 4 Scout 17B"
 family = "llama"
 release_date = "2025-04-05"
 last_updated = "2025-04-05"
-attachment = false
+attachment = true
 reasoning = false
 tool_call = true
+temperature = true
+knowledge = "2024-08"
 open_weights = true
 
 [cost]
@@ -12,8 +14,8 @@ input = 0.08
 output = 0.30
 
 [limit]
-context = 10_000_000
-output = 16_384
+context = 327_680
+output = 327_680
 
 [modalities]
 input = ["text", "image"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml
new file mode 100644
index 000000000..d911f9f9d
--- /dev/null
+++ b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml
@@ -0,0 +1,22 @@
+name = "Meta Llama Guard 4 12B"
+family = "llama"
+release_date = "2025-01-01"
+last_updated = "2025-01-01"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = false
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.18
+output = 0.18
+
+[limit]
+context = 163_840
+output = 163_840
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml
new file mode 100644
index 000000000..ccf9dec57
--- /dev/null
+++ b/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml
@@ -0,0 +1,21 @@
+name = "Meta-Llama-3-8B-Instruct"
+family = "llama"
+release_date = "2025-04-03"
+last_updated = "2025-04-03"
+attachment = false
+reasoning = false
+tool_call = false
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.03
+output = 0.04
+
+[limit]
+context = 8_192
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml
similarity index 56%
rename from providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml
rename to providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml
index 2edbcb221..6246f9cad 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml
+++ b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml
@@ -1,10 +1,12 @@
-name = "Llama 3.1 70B"
+name = "Meta-Llama-3.1-70B-Instruct-Turbo"
 family = "llama"
-release_date = "2024-07-23"
-last_updated = "2024-07-23"
+release_date = "2024-12-06"
+last_updated = "2024-12-06"
 attachment = false
 reasoning = false
 tool_call = true
+temperature = true
+knowledge = "2023-12"
 open_weights = true
 
 [cost]
@@ -13,7 +15,7 @@ output = 0.40
 
 [limit]
 context = 131_072
-output = 16_384
+output = 131_072
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml
similarity index 61%
rename from providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml
rename to providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml
index 79e674844..8922b133d 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml
@@ -1,10 +1,11 @@
-name = "Llama 3.1 70B Turbo"
+name = "Meta-Llama-3.1-70B-Instruct"
 family = "llama"
-release_date = "2024-07-23"
-last_updated = "2024-07-23"
+release_date = "2024-04-17"
+last_updated = "2024-04-17"
 attachment = false
 reasoning = false
 tool_call = true
+temperature = true
 open_weights = true
 
 [cost]
@@ -13,7 +14,7 @@ output = 0.40
 
 [limit]
 context = 131_072
-output = 16_384
+output = 131_072
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml
similarity index 76%
rename from providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml
rename to providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml
index 3646a2d47..dd4c188c4 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml
+++ b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml
@@ -1,10 +1,11 @@
-name = "Llama 3.1 8B Turbo"
+name = "Meta Llama 3.1 8B Instruct Turbo"
 family = "llama"
 release_date = "2024-07-23"
 last_updated = "2024-07-23"
 attachment = false
 reasoning = false
 tool_call = true
+temperature = true
 open_weights = true
 
 [cost]
@@ -13,7 +14,7 @@ output = 0.03
 
 [limit]
 context = 131_072
-output = 16_384
+output = 131_072
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml
similarity index 61%
rename from providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml
rename to providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml
index ec3539991..118a03591 100644
--- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml
+++ b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml
@@ -1,10 +1,11 @@
-name = "Llama 3.1 8B"
+name = "Meta-Llama-3.1-8B-Instruct"
 family = "llama"
-release_date = "2024-07-23"
-last_updated = "2024-07-23"
+release_date = "2025-06-11"
+last_updated = "2025-06-11"
 attachment = false
 reasoning = false
 tool_call = true
+temperature = true
 open_weights = true
 
 [cost]
@@ -13,7 +14,7 @@ output = 0.05
 
 [limit]
 context = 131_072
-output = 16_384
+output = 131_072
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/microsoft/phi-4.toml b/providers/deepinfra/models/microsoft/phi-4.toml
new file mode 100644
index 000000000..8935e609b
--- /dev/null
+++ b/providers/deepinfra/models/microsoft/phi-4.toml
@@ -0,0 +1,22 @@
+name = "Phi-4"
+family = "phi"
+release_date = "2024-12-11"
+last_updated = "2024-12-11"
+attachment = false
+reasoning = false
+temperature = true
+knowledge = "2023-10"
+tool_call = false
+open_weights = true
+
+[cost]
+input = 0.07
+output = 0.14
+
+[limit]
+context = 16_384
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
new file mode 100644
index 000000000..8b2a91488
--- /dev/null
+++ b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
@@ -0,0 +1,21 @@
+name = "Mistral Nemo Instruct 2407"
+family = "mistral-nemo"
+release_date = "2024-07-25"
+last_updated = "2026-03-17"
+attachment = false
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.02
+output = 0.04
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
new file mode 100644
index 000000000..6b518c5b2
--- /dev/null
+++ b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
@@ -0,0 +1,21 @@
+name = "Mistral Small 24B Instruct (2501)"
+family = "mistral-small"
+release_date = "2025-12-29"
+last_updated = "2026-01-10"
+attachment = false
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.05
+output = 0.08
+
+[limit]
+context = 32_768
+output = 32_768
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
new file mode 100644
index 000000000..e800ca8d8
--- /dev/null
+++ b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
@@ -0,0 +1,21 @@
+name = "Mistral Small 3.2 24B Instruct (2506)"
+family = "mistral-small"
+release_date = "2025-06-20"
+last_updated = "2026-03-17"
+attachment = true
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.07
+output = 0.20
+
+[limit]
+context = 128_000
+output = 128_000
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml b/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml
new file mode 100644
index 000000000..8224aa953
--- /dev/null
+++ b/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml
@@ -0,0 +1,22 @@
+name = "Mixtral 8x7B Instruct v0.1"
+family = "mixtral"
+release_date = "2023-12-11"
+last_updated = "2023-12-11"
+attachment = false
+reasoning = false
+tool_call = true
+temperature = true
+knowledge = "2024-01"
+open_weights = true
+
+[cost]
+input = 0.54
+output = 0.54
+
+[limit]
+context = 32_768
+output = 32_768
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
index 180430bf1..ef0c0b0f4 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
@@ -1,12 +1,11 @@
-# https://deepinfra.com/moonshotai/Kimi-K2-Instruct-0905
 name = "Kimi K2 0905"
 family = "kimi"
 release_date = "2025-09-05"
 last_updated = "2025-09-05"
 attachment = false
 reasoning = false
-temperature = true
 tool_call = true
+temperature = true
 knowledge = "2024-10"
 open_weights = true
 
@@ -16,9 +15,9 @@ output = 2.00
 cache_read = 0.15
 
 [limit]
-context = 262_144
-output = 262_144
+context = 131_072
+output = 131_072
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
index b455e5568..3e89d3257 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
@@ -4,8 +4,8 @@ release_date = "2025-11-06"
 last_updated = "2025-11-07"
 attachment = false
 reasoning = true
-temperature = true
 tool_call = true
+temperature = true
 knowledge = "2024-10"
 open_weights = true
 
@@ -15,11 +15,11 @@ field = "reasoning_content"
 [cost]
 input = 0.47
 output = 2.00
-cached_input = 0.141
+cache_read = 0.14
 
 [limit]
 context = 131_072
-output = 32_768
+output = 131_072
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5-Turbo.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5-Turbo.toml
new file mode 100644
index 000000000..3d9573493
--- /dev/null
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2.5-Turbo.toml
@@ -0,0 +1,23 @@
+name = "Kimi K2.5 Turbo"
+family = "kimi"
+release_date = "2026-01-27"
+last_updated = "2026-01-27"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.60
+output = 3.00
+cache_read = 0.10
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
index 84183d853..c8c951a03 100644
--- a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
+++ b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
@@ -4,24 +4,24 @@ release_date = "2026-01-27"
 last_updated = "2026-01-27"
 attachment = true
 reasoning = true
+tool_call = true
 structured_output = true
 temperature = true
-tool_call = true
 knowledge = "2025-01"
 open_weights = true
 
+[interleaved]
+field = "reasoning_content"
+
 [cost]
-input = 0.50
-output = 2.80
-cached_input = 0.09
+input = 0.45
+output = 2.25
+cache_read = 0.07
 
 [limit]
 context = 262_144
-output = 32_768
+output = 262_144
 
 [modalities]
-input = ["text", "image", "video"]
+input = ["text", "image"]
 output = ["text"]
-
-[interleaved]
-field = "reasoning_content"
\ No newline at end of file
diff --git a/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml b/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml
new file mode 100644
index 000000000..beab01a63
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml
@@ -0,0 +1,21 @@
+name = "Llama 3.1 Nemotron 70B Instruct"
+family = "nemotron"
+release_date = "2024-10-12"
+last_updated = "2024-10-12"
+attachment = false
+reasoning = false
+tool_call = true
+temperature = true
+open_weights = false
+
+[cost]
+input = 1.20
+output = 1.20
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml
new file mode 100644
index 000000000..b7ebafde8
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml
@@ -0,0 +1,21 @@
+name = "Llama 3.3 Nemotron Super 49b V1.5"
+family = "nemotron"
+release_date = "2025-03-16"
+last_updated = "2025-03-16"
+attachment = false
+reasoning = true
+tool_call = false
+temperature = true
+open_weights = false
+
+[cost]
+input = 0.10
+output = 0.40
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml
new file mode 100644
index 000000000..35cc44e85
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml
@@ -0,0 +1,23 @@
+name = "Nemotron 3 Super 120B A12B"
+family = "nemotron"
+release_date = "2026-03-11"
+last_updated = "2026-03-11"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2024-04"
+open_weights = true
+
+[cost]
+input = 0.10
+output = 0.50
+cache_read = 0.10
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL.toml
new file mode 100644
index 000000000..ca27dc226
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL.toml
@@ -0,0 +1,22 @@
+name = "NVIDIA-Nemotron-Nano-12B-v2-VL"
+family = "nemotron"
+release_date = "2025-03-15"
+last_updated = "2026-02-04"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2025-01"
+open_weights = true
+
+[cost]
+input = 0.20
+output = 0.60
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
new file mode 100644
index 000000000..560d065d3
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml
@@ -0,0 +1,22 @@
+name = "Nemotron Nano 9B V2"
+family = "nemotron"
+release_date = "2025-08-18"
+last_updated = "2025-08-18"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2024-09"
+open_weights = true
+
+[cost]
+input = 0.04
+output = 0.16
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml
new file mode 100644
index 000000000..af802fa8b
--- /dev/null
+++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml
@@ -0,0 +1,22 @@
+name = "Nemotron 3 Nano 30B A3B"
+family = "nemotron"
+release_date = "2024-12"
+last_updated = "2024-12"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+knowledge = "2024-09"
+open_weights = true
+
+[cost]
+input = 0.05
+output = 0.20
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml
new file mode 100644
index 000000000..854e2e75b
--- /dev/null
+++ b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml
@@ -0,0 +1,21 @@
+name = "GPT OSS 120B Turbo"
+family = "gpt-oss"
+release_date = "2025-08-05"
+last_updated = "2025-08-05"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.15
+output = 0.60
+
+[limit]
+context = 131_072
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/openai/gpt-oss-120b.toml b/providers/deepinfra/models/openai/gpt-oss-120b.toml
index 0889e79c2..0ecf093ef 100644
--- a/providers/deepinfra/models/openai/gpt-oss-120b.toml
+++ b/providers/deepinfra/models/openai/gpt-oss-120b.toml
@@ -1,23 +1,20 @@
-# https://deepinfra.com/openai/gpt-oss-120b
-
 name = "GPT OSS 120B"
 family = "gpt-oss"
 release_date = "2025-08-05"
 last_updated = "2025-08-05"
 attachment = false
 reasoning = true
-temperature = true
 tool_call = true
+temperature = true
 open_weights = true
 
 [cost]
-input = 0.05
-output = 0.24
+input = 0.04
+output = 0.19
 
 [limit]
 context = 131_072
-# https://deepinfra.com/docs/advanced/max_tokens_limit
-output = 16_384
+output = 131_072
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/openai/gpt-oss-20b.toml b/providers/deepinfra/models/openai/gpt-oss-20b.toml
index 9342fee14..4f84265ea 100644
--- a/providers/deepinfra/models/openai/gpt-oss-20b.toml
+++ b/providers/deepinfra/models/openai/gpt-oss-20b.toml
@@ -1,13 +1,11 @@
-# https://deepinfra.com/openai/gpt-oss-20b
-
 name = "GPT OSS 20B"
 family = "gpt-oss"
 release_date = "2025-08-05"
 last_updated = "2025-08-05"
 attachment = false
 reasoning = true
-temperature = true
 tool_call = true
+temperature = true
 open_weights = true
 
 [cost]
@@ -16,8 +14,7 @@ output = 0.14
 
 [limit]
 context = 131_072
-# https://deepinfra.com/docs/advanced/max_tokens_limit
-output = 16_384	
+output = 131_072
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml
new file mode 100644
index 000000000..4e5ca5d74
--- /dev/null
+++ b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml
@@ -0,0 +1,22 @@
+name = "Step-3.5-Flash"
+family = "step"
+release_date = "2026-04-06"
+last_updated = "2026-04-06"
+attachment = false
+reasoning = false
+tool_call = false
+temperature = true
+open_weights = false
+
+[cost]
+input = 0.10
+output = 0.30
+cache_read = 0.02
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-4.5.toml b/providers/deepinfra/models/zai-org/GLM-4.5.toml
deleted file mode 100644
index ae5ae7ec1..000000000
--- a/providers/deepinfra/models/zai-org/GLM-4.5.toml
+++ /dev/null
@@ -1,26 +0,0 @@
-name = "GLM-4.5"
-family = "glm"
-release_date = "2025-07-28"
-last_updated = "2025-07-28"
-attachment = false
-reasoning = false
-temperature = true
-tool_call = true
-knowledge = "2025-04"
-open_weights = true
-
-# https://deepinfra.com/zai-org/GLM-4.5
-# It is now being redirected to GLM-4.6
-status = "deprecated"
-
-[cost]
-input = 0.60
-output = 2.20
-
-[limit]
-context = 131_072
-output = 98_304
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-4.6.toml b/providers/deepinfra/models/zai-org/GLM-4.6.toml
index 63eb5b3f7..693cb68ad 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.6.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.6.toml
@@ -1,13 +1,12 @@
-# https://deepinfra.com/zai-org/GLM-4.6
-name = "GLM-4.6"
+name = "GLM 4.6"
 family = "glm"
 release_date = "2025-09-30"
 last_updated = "2025-09-30"
-knowledge = "2025-04"
 attachment = false
 reasoning = true
-temperature = true
 tool_call = true
+temperature = true
+knowledge = "2025-04"
 open_weights = true
 
 [interleaved]
@@ -19,9 +18,9 @@ output = 1.74
 cache_read = 0.08
 
 [limit]
-context = 204_800
-output = 131_072
+context = 202_752
+output = 202_752
 
 [modalities]
 input = ["text"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-4.6V.toml b/providers/deepinfra/models/zai-org/GLM-4.6V.toml
index b3f6ce288..d91baba63 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.6V.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.6V.toml
@@ -1,13 +1,12 @@
-# https://deepinfra.com/zai-org/GLM-4.6V
-name = "GLM-4.6V"
+name = "GLM 4.6V"
 family = "glm"
 release_date = "2025-09-30"
 last_updated = "2025-09-30"
-knowledge = "2025-04"
 attachment = true
 reasoning = true
-temperature = true
 tool_call = true
+temperature = true
+knowledge = "2025-04"
 open_weights = true
 
 [interleaved]
@@ -18,9 +17,9 @@ input = 0.30
 output = 0.90
 
 [limit]
-context = 204_800
+context = 131_072
 output = 131_072
 
 [modalities]
 input = ["text", "image"]
-output = ["text"]
\ No newline at end of file
+output = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
index dbb42a2d8..0be4ef395 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
@@ -1,13 +1,12 @@
-# https://deepinfra.com/zai-org/GLM-4.7-Flash
-name = "GLM-4.7-Flash"
+name = "GLM 4.7 Flash"
 family = "glm-flash"
 release_date = "2026-01-19"
 last_updated = "2026-01-19"
-knowledge = "2025-04"
 attachment = false
 reasoning = true
-temperature = true
 tool_call = true
+temperature = true
+knowledge = "2025-04"
 open_weights = true
 
 [interleaved]
@@ -16,11 +15,11 @@ field = "reasoning_content"
 [cost]
 input = 0.06
 output = 0.40
+cache_read = 0.01
 
 [limit]
 context = 202_752
-# https://deepinfra.com/docs/advanced/max_tokens_limit
-output = 16_384
+output = 202_752
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml
index 76b66a0c5..ffc25c089 100644
--- a/providers/deepinfra/models/zai-org/GLM-4.7.toml
+++ b/providers/deepinfra/models/zai-org/GLM-4.7.toml
@@ -1,27 +1,25 @@
-# https://deepinfra.com/zai-org/GLM-4.7
-name = "GLM-4.7"
+name = "GLM 4.7"
 family = "glm"
 release_date = "2025-12-22"
 last_updated = "2025-12-22"
-knowledge = "2025-04"
 attachment = false
 reasoning = true
-temperature = true
 tool_call = true
+temperature = true
+knowledge = "2025-04"
 open_weights = true
 
 [interleaved]
 field = "reasoning_content"
 
 [cost]
-input = 0.43
+input = 0.40
 output = 1.75
 cache_read = 0.08
 
 [limit]
-context = 202_752 
-# https://deepinfra.com/docs/advanced/max_tokens_limit
-output = 16_384
+context = 202_752
+output = 202_752
 
 [modalities]
 input = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-5.toml b/providers/deepinfra/models/zai-org/GLM-5.toml
index 257b2c673..5bcd66e73 100644
--- a/providers/deepinfra/models/zai-org/GLM-5.toml
+++ b/providers/deepinfra/models/zai-org/GLM-5.toml
@@ -1,27 +1,25 @@
-# https://deepinfra.com/zai-org/GLM-5
-name = "GLM-5"
+name = "GLM 5"
 family = "glm"
 release_date = "2026-02-12"
 last_updated = "2026-02-12"
-knowledge = "2025-12"
 attachment = false
 reasoning = true
-temperature = true
 tool_call = true
+temperature = true
+knowledge = "2025-12"
 open_weights = true
 
 [interleaved]
 field = "reasoning_content"
 
 [cost]
-input = 0.8
+input = 0.80
 output = 2.56
 cache_read = 0.16
 
 [limit]
-context = 202_752 
-# https://deepinfra.com/docs/advanced/max_tokens_limit
-output = 16_384
+context = 202_752
+output = 202_752
 
 [modalities]
 input = ["text"]

From a7a4bd301dc6908f66ece8e8d45ada37562bc8ca Mon Sep 17 00:00:00 2001
From: Christopher Tam <ohgodtamit@gmail.com>
Date: Tue, 7 Apr 2026 09:48:14 -0400
Subject: [PATCH 3/6] Be more selective about models

---
 packages/core/script/generate-deepinfra.ts    | 43 +++++++++++++------
 .../deepinfra/models/ByteDance/Seed-1.8.toml  | 22 ----------
 .../models/ByteDance/Seed-2.0-mini.toml       | 22 ----------
 .../models/ByteDance/Seed-2.0-pro.toml        | 22 ----------
 .../models/Gryphe/MythoMax-L2-13b.toml        | 21 ---------
 .../NousResearch/Hermes-3-Llama-3.1-405B.toml | 21 ---------
 .../NousResearch/Hermes-3-Llama-3.1-70B.toml  | 21 ---------
 .../PaddlePaddle/PaddleOCR-VL-0.9B.toml       | 21 ---------
 .../models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml | 21 ---------
 .../models/Sao10K/L3.1-70B-Euryale-v2.2.toml  | 21 ---------
 .../models/Sao10K/L3.3-70B-Euryale-v2.3.toml  | 21 ---------
 .../models/allenai/Olmo-3.1-32B-Instruct.toml | 21 ---------
 .../models/allenai/olmOCR-2-7B-1025.toml      | 21 ---------
 .../deepinfra/models/microsoft/phi-4.toml     | 22 ----------
 .../mistralai/Mistral-Nemo-Instruct-2407.toml | 21 ---------
 .../Mistral-Small-24B-Instruct-2501.toml      | 21 ---------
 .../Mistral-Small-3.2-24B-Instruct-2506.toml  | 21 ---------
 .../mistralai/Mixtral-8x7B-Instruct-v0.1.toml | 22 ----------
 18 files changed, 29 insertions(+), 376 deletions(-)
 delete mode 100644 providers/deepinfra/models/ByteDance/Seed-1.8.toml
 delete mode 100644 providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml
 delete mode 100644 providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml
 delete mode 100644 providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
 delete mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
 delete mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
 delete mode 100644 providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml
 delete mode 100644 providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
 delete mode 100644 providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
 delete mode 100644 providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml
 delete mode 100644 providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml
 delete mode 100644 providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml
 delete mode 100644 providers/deepinfra/models/microsoft/phi-4.toml
 delete mode 100644 providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
 delete mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
 delete mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
 delete mode 100644 providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml

diff --git a/packages/core/script/generate-deepinfra.ts b/packages/core/script/generate-deepinfra.ts
index 963748285..af380b1c9 100644
--- a/packages/core/script/generate-deepinfra.ts
+++ b/packages/core/script/generate-deepinfra.ts
@@ -16,33 +16,46 @@ import { ModelFamilyValues } from "../src/family.js";
 
 const API_ENDPOINT = "https://api.deepinfra.com/v1/openai/models";
 
-const PROVIDER_DENYLIST: string[] = [
-  "BAAI",
-  "Bria",
-  "Clarity",
-  "ClarityAI",
-  "intfloat",
-  "sentence-transformers",
-  "shibing624",
-  "stabilityai",
-  "thenlper",
-  "Wan-AI",
+// Allowlist of providers to include - only these providers will be tracked
+// This is intentionally restrictive since most models shouldn't be included
+const PROVIDER_ALLOWLIST: string[] = [
+  "anthropic",
+  "deepseek-ai",
+  "google",
+  "meta-llama",
+  "MiniMaxAI",
+  "moonshotai",
+  "nvidia",
+  "openai",
+  "Qwen",
+  "stepfun-ai",
+  "zai-org",
 ];
 
+// Models/patterns to skip even from allowed providers (embeddings, image gen, etc.)
 const MODEL_REGEX_DENYLIST: RegExp[] = [
+  // Avoid most embedding models
   /embed/i,
+
   /(^|\/)FLUX/i,
-  /Seedream/i,
   /Janus-Pro/i,
   /p-image/i,
+
+  // Avoid any Qwen image generation models
   /Qwen-Image/i,
+
+  // Qwen 2.5 models are obsolete
+  /Qwen2.5/i,
+  /Seedream/i,
 ];
 
 function shouldSkipModel(modelId: string): boolean {
   const provider = modelId.split("/")[0];
-  if (provider && PROVIDER_DENYLIST.includes(provider)) {
+  // Skip if provider is not in the allowlist
+  if (!provider || !PROVIDER_ALLOWLIST.includes(provider)) {
     return true;
   }
+  // Also skip models matching excluded patterns (embeddings, image gen, etc.)
   return MODEL_REGEX_DENYLIST.some((pattern) => pattern.test(modelId));
 }
 
@@ -433,7 +446,9 @@ function detectChanges(
     if (oldPrice === 0 && newPrice === undefined) return false;
 
     if (oldPrice !== undefined && newPrice !== undefined) {
-      return (oldPrice as number).toFixed(2) !== (newPrice as number).toFixed(2);
+      return (
+        (oldPrice as number).toFixed(2) !== (newPrice as number).toFixed(2)
+      );
     }
 
     return oldPrice !== newPrice;
diff --git a/providers/deepinfra/models/ByteDance/Seed-1.8.toml b/providers/deepinfra/models/ByteDance/Seed-1.8.toml
deleted file mode 100644
index 1b2a295ff..000000000
--- a/providers/deepinfra/models/ByteDance/Seed-1.8.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Seed 1.8"
-family = "seed"
-release_date = "2025-12-18"
-last_updated = "2026-02-25"
-attachment = true
-reasoning = true
-tool_call = true
-temperature = true
-open_weights = false
-
-[cost]
-input = 0.25
-output = 2.00
-cache_read = 0.05
-
-[limit]
-context = 256_000
-output = 256_000
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml b/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml
deleted file mode 100644
index dd252063f..000000000
--- a/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Seed-2.0-mini"
-family = "seed"
-release_date = "2026-02-26"
-last_updated = "2026-02-26"
-attachment = true
-reasoning = true
-tool_call = true
-temperature = true
-open_weights = false
-
-[cost]
-input = 0.10
-output = 0.40
-cache_read = 0.02
-
-[limit]
-context = 256_000
-output = 256_000
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml b/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml
deleted file mode 100644
index 6f8c1e024..000000000
--- a/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Seed-2.0-pro"
-family = "seed"
-release_date = "2026-02-14"
-last_updated = "2026-02-14"
-attachment = true
-reasoning = true
-tool_call = true
-temperature = true
-open_weights = false
-
-[cost]
-input = 0.50
-output = 3.00
-cache_read = 0.10
-
-[limit]
-context = 256_000
-output = 256_000
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
deleted file mode 100644
index 473d3ebbc..000000000
--- a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "MythoMax 13B"
-family = "o"
-release_date = "2024-04-25"
-last_updated = "2024-04-25"
-attachment = false
-reasoning = false
-tool_call = false
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.40
-output = 0.40
-
-[limit]
-context = 4_096
-output = 4_096
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
deleted file mode 100644
index 27aebb6a0..000000000
--- a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Hermes 3 Llama 3.1 405B"
-family = "nousresearch"
-release_date = "2024-08-16"
-last_updated = "2024-08-16"
-attachment = false
-reasoning = false
-tool_call = false
-temperature = true
-open_weights = true
-
-[cost]
-input = 1.00
-output = 1.00
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
deleted file mode 100644
index f9a61481e..000000000
--- a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Hermes 3 Llama 3.1 70B"
-family = "nousresearch"
-release_date = "2024-08-18"
-last_updated = "2026-03-15"
-attachment = false
-reasoning = false
-tool_call = false
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.30
-output = 0.30
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml b/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml
deleted file mode 100644
index 62a9150db..000000000
--- a/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "PaddleOCR-VL-0.9B"
-family = "o"
-release_date = "2025-10-22"
-last_updated = "2025-10-22"
-attachment = true
-reasoning = false
-tool_call = false
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.14
-output = 0.80
-
-[limit]
-context = 16_384
-output = 16_384
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
deleted file mode 100644
index 2bdebc4f7..000000000
--- a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "L3 8B Lunaris v1 Turbo"
-family = "o"
-release_date = "2024-08-13"
-last_updated = "2026-03-15"
-attachment = false
-reasoning = false
-tool_call = false
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.04
-output = 0.05
-
-[limit]
-context = 8_192
-output = 8_192
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
deleted file mode 100644
index 7aeb7931b..000000000
--- a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "L3.1 70B Euryale v2.2"
-family = "o"
-release_date = "2024-09-19"
-last_updated = "2024-09-19"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.85
-output = 0.85
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml b/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml
deleted file mode 100644
index daccc1e8e..000000000
--- a/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "L3.3 70B Euryale v2.3"
-family = "o"
-release_date = "2024-12-06"
-last_updated = "2024-12-06"
-attachment = false
-reasoning = false
-tool_call = false
-temperature = true
-open_weights = false
-
-[cost]
-input = 0.85
-output = 0.85
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml b/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml
deleted file mode 100644
index f9c5727ea..000000000
--- a/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Olmo 3.1 32B Instruct"
-family = "allenai"
-release_date = "2026-01-07"
-last_updated = "2026-03-15"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.20
-output = 0.60
-
-[limit]
-context = 65_536
-output = 65_536
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml b/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml
deleted file mode 100644
index fb09bf38a..000000000
--- a/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "olmOCR-2-7B-1025"
-family = "allenai"
-release_date = "2025-10-22"
-last_updated = "2025-10-22"
-attachment = true
-reasoning = false
-tool_call = false
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.09
-output = 0.19
-
-[limit]
-context = 16_384
-output = 16_384
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/microsoft/phi-4.toml b/providers/deepinfra/models/microsoft/phi-4.toml
deleted file mode 100644
index 8935e609b..000000000
--- a/providers/deepinfra/models/microsoft/phi-4.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Phi-4"
-family = "phi"
-release_date = "2024-12-11"
-last_updated = "2024-12-11"
-attachment = false
-reasoning = false
-temperature = true
-knowledge = "2023-10"
-tool_call = false
-open_weights = true
-
-[cost]
-input = 0.07
-output = 0.14
-
-[limit]
-context = 16_384
-output = 16_384
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
deleted file mode 100644
index 8b2a91488..000000000
--- a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Mistral Nemo Instruct 2407"
-family = "mistral-nemo"
-release_date = "2024-07-25"
-last_updated = "2026-03-17"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.02
-output = 0.04
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
deleted file mode 100644
index 6b518c5b2..000000000
--- a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Mistral Small 24B Instruct (2501)"
-family = "mistral-small"
-release_date = "2025-12-29"
-last_updated = "2026-01-10"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.05
-output = 0.08
-
-[limit]
-context = 32_768
-output = 32_768
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
deleted file mode 100644
index e800ca8d8..000000000
--- a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Mistral Small 3.2 24B Instruct (2506)"
-family = "mistral-small"
-release_date = "2025-06-20"
-last_updated = "2026-03-17"
-attachment = true
-reasoning = false
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.07
-output = 0.20
-
-[limit]
-context = 128_000
-output = 128_000
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml b/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml
deleted file mode 100644
index 8224aa953..000000000
--- a/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Mixtral 8x7B Instruct v0.1"
-family = "mixtral"
-release_date = "2023-12-11"
-last_updated = "2023-12-11"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-knowledge = "2024-01"
-open_weights = true
-
-[cost]
-input = 0.54
-output = 0.54
-
-[limit]
-context = 32_768
-output = 32_768
-
-[modalities]
-input = ["text"]
-output = ["text"]

From ed7c6dff744c01ab496a5e1997f8f5dfaf0e40fe Mon Sep 17 00:00:00 2001
From: Christopher Tam <ohgodtamit@gmail.com>
Date: Tue, 7 Apr 2026 09:58:18 -0400
Subject: [PATCH 4/6] Filter out old Google and Llama models

---
 packages/core/script/generate-deepinfra.ts    | 11 +++++++++
 .../models/Qwen/Qwen2.5-72B-Instruct.toml     | 21 -----------------
 .../models/Qwen/Qwen2.5-VL-32B-Instruct.toml  | 21 -----------------
 .../models/google/gemini-1.5-flash-8b.toml    | 22 ------------------
 .../models/google/gemini-1.5-flash.toml       | 22 ------------------
 .../models/google/gemini-2.5-flash.toml       | 23 -------------------
 .../models/google/gemini-2.5-pro.toml         | 23 -------------------
 .../models/google/gemma-3-12b-it.toml         | 23 -------------------
 .../models/google/gemma-3-27b-it.toml         | 23 -------------------
 .../models/google/gemma-3-4b-it.toml          | 22 ------------------
 .../meta-llama/Meta-Llama-3-8B-Instruct.toml  | 21 -----------------
 .../Meta-Llama-3.1-70B-Instruct-Turbo.toml    | 22 ------------------
 .../Meta-Llama-3.1-70B-Instruct.toml          | 21 -----------------
 .../Meta-Llama-3.1-8B-Instruct-Turbo.toml     | 21 -----------------
 .../Meta-Llama-3.1-8B-Instruct.toml           | 21 -----------------
 .../Llama-3.1-Nemotron-70B-Instruct.toml      | 21 -----------------
 16 files changed, 11 insertions(+), 327 deletions(-)
 delete mode 100644 providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
 delete mode 100644 providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml
 delete mode 100644 providers/deepinfra/models/google/gemini-1.5-flash-8b.toml
 delete mode 100644 providers/deepinfra/models/google/gemini-1.5-flash.toml
 delete mode 100644 providers/deepinfra/models/google/gemini-2.5-flash.toml
 delete mode 100644 providers/deepinfra/models/google/gemini-2.5-pro.toml
 delete mode 100644 providers/deepinfra/models/google/gemma-3-12b-it.toml
 delete mode 100644 providers/deepinfra/models/google/gemma-3-27b-it.toml
 delete mode 100644 providers/deepinfra/models/google/gemma-3-4b-it.toml
 delete mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml
 delete mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml
 delete mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml
 delete mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml
 delete mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml
 delete mode 100644 providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml

diff --git a/packages/core/script/generate-deepinfra.ts b/packages/core/script/generate-deepinfra.ts
index af380b1c9..5be5a3855 100644
--- a/packages/core/script/generate-deepinfra.ts
+++ b/packages/core/script/generate-deepinfra.ts
@@ -38,6 +38,16 @@ const MODEL_REGEX_DENYLIST: RegExp[] = [
   /embed/i,
 
   /(^|\/)FLUX/i,
+
+  // Old Google models
+  /gemini-1.5/i,
+  /gemini-2.5/i,
+  /gemma-3/i,
+
+  // Old Llama models
+  /Llama-3-/i,
+  /Llama-3.1-/i,
+
   /Janus-Pro/i,
   /p-image/i,
 
@@ -46,6 +56,7 @@ const MODEL_REGEX_DENYLIST: RegExp[] = [
 
   // Qwen 2.5 models are obsolete
   /Qwen2.5/i,
+
   /Seedream/i,
 ];
 
diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
deleted file mode 100644
index 83b19b273..000000000
--- a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Qwen 2.5 72B Instruct"
-family = "qwen"
-release_date = "2024-09-18"
-last_updated = "2025-11-25"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.12
-output = 0.39
-
-[limit]
-context = 32_768
-output = 32_768
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml
deleted file mode 100644
index f375659e6..000000000
--- a/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Qwen 2.5 VL 32B Instruct"
-family = "qwen"
-release_date = "2025-03-24"
-last_updated = "2025-11-25"
-attachment = true
-reasoning = false
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.20
-output = 0.60
-
-[limit]
-context = 128_000
-output = 128_000
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml b/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml
deleted file mode 100644
index 465ac3578..000000000
--- a/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Gemini 1.5 Flash"
-family = "gemini-flash"
-release_date = "2024-05-14"
-last_updated = "2024-05-14"
-attachment = true
-reasoning = false
-tool_call = true
-temperature = true
-knowledge = "2024-04"
-open_weights = false
-
-[cost]
-input = 0.04
-output = 0.15
-
-[limit]
-context = 1_000_000
-output = 1_000_000
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/google/gemini-1.5-flash.toml b/providers/deepinfra/models/google/gemini-1.5-flash.toml
deleted file mode 100644
index a9904cb76..000000000
--- a/providers/deepinfra/models/google/gemini-1.5-flash.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "gemini-1.5-flash"
-family = "gemini-flash"
-release_date = "2024-05-14"
-last_updated = "2024-05-14"
-attachment = true
-reasoning = false
-tool_call = true
-temperature = true
-knowledge = "2024-04"
-open_weights = false
-
-[cost]
-input = 0.07
-output = 0.30
-
-[limit]
-context = 1_000_000
-output = 1_000_000
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/google/gemini-2.5-flash.toml b/providers/deepinfra/models/google/gemini-2.5-flash.toml
deleted file mode 100644
index 4867736b7..000000000
--- a/providers/deepinfra/models/google/gemini-2.5-flash.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-name = "Gemini 2.5 Flash"
-family = "gemini-flash"
-release_date = "2025-03-20"
-last_updated = "2025-06-05"
-attachment = true
-reasoning = true
-temperature = true
-knowledge = "2025-01"
-tool_call = true
-structured_output = true
-open_weights = false
-
-[cost]
-input = 0.30
-output = 2.50
-
-[limit]
-context = 1_000_000
-output = 1_000_000
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/google/gemini-2.5-pro.toml b/providers/deepinfra/models/google/gemini-2.5-pro.toml
deleted file mode 100644
index 3ad893819..000000000
--- a/providers/deepinfra/models/google/gemini-2.5-pro.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-name = "Gemini 2.5 Pro"
-family = "gemini-pro"
-release_date = "2025-03-20"
-last_updated = "2025-06-05"
-attachment = true
-reasoning = true
-temperature = true
-knowledge = "2025-01"
-tool_call = true
-structured_output = true
-open_weights = false
-
-[cost]
-input = 1.25
-output = 10.00
-
-[limit]
-context = 1_000_000
-output = 1_000_000
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-3-12b-it.toml b/providers/deepinfra/models/google/gemma-3-12b-it.toml
deleted file mode 100644
index dc03b0c68..000000000
--- a/providers/deepinfra/models/google/gemma-3-12b-it.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-name = "Gemma 3 12B"
-family = "gemma"
-release_date = "2025-03-13"
-last_updated = "2025-03-13"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2024-10"
-tool_call = false
-structured_output = true
-open_weights = true
-
-[cost]
-input = 0.04
-output = 0.13
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-3-27b-it.toml b/providers/deepinfra/models/google/gemma-3-27b-it.toml
deleted file mode 100644
index 196f4a12f..000000000
--- a/providers/deepinfra/models/google/gemma-3-27b-it.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-name = "Gemma 3 27B"
-family = "gemma"
-release_date = "2025-03-12"
-last_updated = "2025-03-12"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2024-10"
-tool_call = true
-structured_output = true
-open_weights = true
-
-[cost]
-input = 0.08
-output = 0.16
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-3-4b-it.toml b/providers/deepinfra/models/google/gemma-3-4b-it.toml
deleted file mode 100644
index 5b5c634d3..000000000
--- a/providers/deepinfra/models/google/gemma-3-4b-it.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Gemma 3 4B"
-family = "gemma"
-release_date = "2025-03-13"
-last_updated = "2025-03-13"
-attachment = true
-reasoning = false
-temperature = true
-knowledge = "2024-10"
-tool_call = false
-open_weights = true
-
-[cost]
-input = 0.04
-output = 0.08
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text", "image"]
-output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml
deleted file mode 100644
index ccf9dec57..000000000
--- a/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Meta-Llama-3-8B-Instruct"
-family = "llama"
-release_date = "2025-04-03"
-last_updated = "2025-04-03"
-attachment = false
-reasoning = false
-tool_call = false
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.03
-output = 0.04
-
-[limit]
-context = 8_192
-output = 8_192
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml
deleted file mode 100644
index 6246f9cad..000000000
--- a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "Meta-Llama-3.1-70B-Instruct-Turbo"
-family = "llama"
-release_date = "2024-12-06"
-last_updated = "2024-12-06"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-knowledge = "2023-12"
-open_weights = true
-
-[cost]
-input = 0.40
-output = 0.40
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml
deleted file mode 100644
index 8922b133d..000000000
--- a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Meta-Llama-3.1-70B-Instruct"
-family = "llama"
-release_date = "2024-04-17"
-last_updated = "2024-04-17"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.40
-output = 0.40
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml
deleted file mode 100644
index dd4c188c4..000000000
--- a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Meta Llama 3.1 8B Instruct Turbo"
-family = "llama"
-release_date = "2024-07-23"
-last_updated = "2024-07-23"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.02
-output = 0.03
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml
deleted file mode 100644
index 118a03591..000000000
--- a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Meta-Llama-3.1-8B-Instruct"
-family = "llama"
-release_date = "2025-06-11"
-last_updated = "2025-06-11"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.02
-output = 0.05
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml b/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml
deleted file mode 100644
index beab01a63..000000000
--- a/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-name = "Llama 3.1 Nemotron 70B Instruct"
-family = "nemotron"
-release_date = "2024-10-12"
-last_updated = "2024-10-12"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-open_weights = false
-
-[cost]
-input = 1.20
-output = 1.20
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]

From 19185560f2cc947171b21a02dfa32a638d7e0b7c Mon Sep 17 00:00:00 2001
From: Christopher Tam <ohgodtamit@gmail.com>
Date: Tue, 7 Apr 2026 10:04:39 -0400
Subject: [PATCH 5/6] Remove obsolete DeepSeek models

---
 packages/core/script/generate-deepinfra.ts    |  4 ++++
 .../models/deepseek-ai/DeepSeek-V3-0324.toml  | 23 -------------------
 .../deepseek-ai/DeepSeek-V3.1-Terminus.toml   | 22 ------------------
 .../models/deepseek-ai/DeepSeek-V3.1.toml     | 22 ------------------
 .../models/deepseek-ai/DeepSeek-V3.toml       | 22 ------------------
 5 files changed, 4 insertions(+), 89 deletions(-)
 delete mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
 delete mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
 delete mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
 delete mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml

diff --git a/packages/core/script/generate-deepinfra.ts b/packages/core/script/generate-deepinfra.ts
index 5be5a3855..28b89738f 100644
--- a/packages/core/script/generate-deepinfra.ts
+++ b/packages/core/script/generate-deepinfra.ts
@@ -39,6 +39,10 @@ const MODEL_REGEX_DENYLIST: RegExp[] = [
 
   /(^|\/)FLUX/i,
 
+  // Old DeepSeek models
+  /DeepSeek-V3(?!\.)/i,
+  /DeepSeek-V3.1/i,
+
   // Old Google models
   /gemini-1.5/i,
   /gemini-2.5/i,
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
deleted file mode 100644
index c84aaf23b..000000000
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-name = "DeepSeek V3 0324"
-family = "deepseek"
-release_date = "2025-03-24"
-last_updated = "2025-03-24"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-knowledge = "2025-01"
-open_weights = true
-
-[cost]
-input = 0.20
-output = 0.77
-cache_read = 0.14
-
-[limit]
-context = 163_840
-output = 163_840
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
deleted file mode 100644
index 3c808740d..000000000
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "DeepSeek V3.1 Terminus"
-family = "deepseek"
-release_date = "2025-09-22"
-last_updated = "2025-10-15"
-attachment = false
-reasoning = true
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.21
-output = 0.79
-cache_read = 0.13
-
-[limit]
-context = 163_840
-output = 163_840
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
deleted file mode 100644
index 975c04df2..000000000
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "DeepSeek V3.1"
-family = "deepseek"
-release_date = "2025-08-21"
-last_updated = "2025-08-21"
-attachment = false
-reasoning = true
-tool_call = true
-temperature = true
-open_weights = true
-
-[cost]
-input = 0.21
-output = 0.79
-cache_read = 0.13
-
-[limit]
-context = 163_840
-output = 163_840
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
deleted file mode 100644
index b2a43d543..000000000
--- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-name = "DeepSeek-V3"
-family = "deepseek"
-release_date = "2024-12-26"
-last_updated = "2025-01-20"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-knowledge = "2024-07"
-open_weights = true
-
-[cost]
-input = 0.32
-output = 0.89
-
-[limit]
-context = 163_840
-output = 163_840
-
-[modalities]
-input = ["text"]
-output = ["text"]

From 678f375f9a2230872b10e3b035d00a221a92ac06 Mon Sep 17 00:00:00 2001
From: Christopher Tam <ohgodtamit@gmail.com>
Date: Wed, 8 Apr 2026 15:20:25 -0400
Subject: [PATCH 6/6] Update with new Gemma 4 & GLM 5.1 models

---
 .../models/google/gemma-4-26B-A4B-it.toml     | 22 ++++++++++++++++
 .../models/google/gemma-4-31B-it.toml         | 22 ++++++++++++++++
 .../moonshotai/Kimi-K2-Instruct-0905.toml     | 23 ----------------
 .../models/moonshotai/Kimi-K2-Thinking.toml   | 26 -------------------
 .../deepinfra/models/zai-org/GLM-5.1.toml     | 22 ++++++++++++++++
 5 files changed, 66 insertions(+), 49 deletions(-)
 create mode 100644 providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml
 create mode 100644 providers/deepinfra/models/google/gemma-4-31B-it.toml
 delete mode 100644 providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
 delete mode 100644 providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
 create mode 100644 providers/deepinfra/models/zai-org/GLM-5.1.toml

diff --git a/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml
new file mode 100644
index 000000000..93d91acdf
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml
@@ -0,0 +1,22 @@
+name = "Gemma 4 26B A4B"
+family = "gemma"
+release_date = "2026-04-02"
+last_updated = "2026-04-02"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.08
+output = 0.35
+cache_read = 0.01
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/google/gemma-4-31B-it.toml b/providers/deepinfra/models/google/gemma-4-31B-it.toml
new file mode 100644
index 000000000..4ea31dfa4
--- /dev/null
+++ b/providers/deepinfra/models/google/gemma-4-31B-it.toml
@@ -0,0 +1,22 @@
+name = "Gemma 4 31B"
+family = "gemma"
+release_date = "2026-04-02"
+last_updated = "2026-04-02"
+attachment = true
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 0.13
+output = 0.38
+cache_read = 0.02
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
deleted file mode 100644
index ef0c0b0f4..000000000
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-name = "Kimi K2 0905"
-family = "kimi"
-release_date = "2025-09-05"
-last_updated = "2025-09-05"
-attachment = false
-reasoning = false
-tool_call = true
-temperature = true
-knowledge = "2024-10"
-open_weights = true
-
-[cost]
-input = 0.40
-output = 2.00
-cache_read = 0.15
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
deleted file mode 100644
index 3e89d3257..000000000
--- a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml
+++ /dev/null
@@ -1,26 +0,0 @@
-name = "Kimi K2 Thinking"
-family = "kimi-thinking"
-release_date = "2025-11-06"
-last_updated = "2025-11-07"
-attachment = false
-reasoning = true
-tool_call = true
-temperature = true
-knowledge = "2024-10"
-open_weights = true
-
-[interleaved]
-field = "reasoning_content"
-
-[cost]
-input = 0.47
-output = 2.00
-cache_read = 0.14
-
-[limit]
-context = 131_072
-output = 131_072
-
-[modalities]
-input = ["text"]
-output = ["text"]
diff --git a/providers/deepinfra/models/zai-org/GLM-5.1.toml b/providers/deepinfra/models/zai-org/GLM-5.1.toml
new file mode 100644
index 000000000..29bebcba6
--- /dev/null
+++ b/providers/deepinfra/models/zai-org/GLM-5.1.toml
@@ -0,0 +1,22 @@
+name = "GLM-5.1"
+family = "glm"
+release_date = "2026-03-27"
+last_updated = "2026-03-27"
+attachment = false
+reasoning = true
+tool_call = true
+temperature = true
+open_weights = true
+
+[cost]
+input = 1.40
+output = 4.40
+cache_read = 0.26
+
+[limit]
+context = 202_752
+output = 202_752
+
+[modalities]
+input = ["text"]
+output = ["text"]