From ebc1de61203414aea5f7c217871858fb666ec468 Mon Sep 17 00:00:00 2001 From: Christopher Tam Date: Mon, 6 Apr 2026 09:42:48 -0400 Subject: [PATCH 1/6] core: Add script to sync DeepInfra models --- packages/core/script/generate-deepinfra.ts | 687 +++++++++++++++++++++ 1 file changed, 687 insertions(+) create mode 100644 packages/core/script/generate-deepinfra.ts diff --git a/packages/core/script/generate-deepinfra.ts b/packages/core/script/generate-deepinfra.ts new file mode 100644 index 000000000..963748285 --- /dev/null +++ b/packages/core/script/generate-deepinfra.ts @@ -0,0 +1,687 @@ +#!/usr/bin/env bun + +/** + * Generates DeepInfra model TOML files from the OpenAI-compatible API. + * + * Flags: + * --dry-run: Preview changes without writing files + * --new-only: Only create new models, skip updating existing ones + * --no-delete: Keep orphaned files instead of deleting them + */ + +import { z } from "zod"; +import path from "node:path"; +import { mkdir, unlink } from "node:fs/promises"; +import { ModelFamilyValues } from "../src/family.js"; + +const API_ENDPOINT = "https://api.deepinfra.com/v1/openai/models"; + +const PROVIDER_DENYLIST: string[] = [ + "BAAI", + "Bria", + "Clarity", + "ClarityAI", + "intfloat", + "sentence-transformers", + "shibing624", + "stabilityai", + "thenlper", + "Wan-AI", +]; + +const MODEL_REGEX_DENYLIST: RegExp[] = [ + /embed/i, + /(^|\/)FLUX/i, + /Seedream/i, + /Janus-Pro/i, + /p-image/i, + /Qwen-Image/i, +]; + +function shouldSkipModel(modelId: string): boolean { + const provider = modelId.split("/")[0]; + if (provider && PROVIDER_DENYLIST.includes(provider)) { + return true; + } + return MODEL_REGEX_DENYLIST.some((pattern) => pattern.test(modelId)); +} + +enum StubbedFields { + ReleaseDate = "release_date", + LastUpdated = "last_updated", +} + +const DeepInfraModel = z + .object({ + id: z.string(), + object: z.string(), + created: z.number(), + owned_by: z.string(), + root: z.string(), + parent: z.string().nullable(), + metadata: z + .object({ + description: z.string().optional(), + context_length: z.number().optional(), + max_tokens: z.number().optional(), + pricing: z + .object({ + input_tokens: z.number().optional(), + output_tokens: z.number().optional(), + cache_read_tokens: z.number().optional(), + cache_write_tokens: z.number().optional(), + }) + .passthrough() + .optional(), + tags: z.array(z.string()).optional(), + }) + .passthrough() + .nullable(), + }) + .passthrough(); + +const DeepInfraResponse = z + .object({ + object: z.string(), + data: z.array(DeepInfraModel), + }) + .passthrough(); + +interface ExistingModel { + name?: string; + family?: string; + attachment?: boolean; + reasoning?: boolean; + tool_call?: boolean; + structured_output?: boolean; + temperature?: boolean; + knowledge?: string; + release_date?: string; + last_updated?: string; + open_weights?: boolean; + interleaved?: boolean | { field: string }; + status?: string; + cost?: { + input?: number; + output?: number; + cache_read?: number; + cache_write?: number; + }; + limit?: { + context?: number; + input?: number; + output?: number; + }; + modalities?: { + input?: string[]; + output?: string[]; + }; +} + +interface MergedModel { + name: string; + family?: string; + attachment: boolean; + reasoning: boolean; + tool_call: boolean; + structured_output?: boolean; + temperature: boolean; + knowledge?: string; + release_date: string; + last_updated: string; + open_weights: boolean; + interleaved?: boolean | { field: string }; + status?: string; + cost?: { + input: number; + output: number; + cache_read?: number; + cache_write?: number; + }; + limit: { + context: number; + input?: number; + output: number; + }; + modalities: { + input: string[]; + output: string[]; + }; +} + +interface Changes { + field: string; + oldValue: string; + newValue: string; +} + +function getTodayDate(): string { + return new Date().toISOString().slice(0, 10); +} + +function formatNumber(n: number): string { + if (n >= 1000) { + return n.toString().replace(/\B(?=(\d{3})+(?!\d))/g, "_"); + } + return n.toString(); +} + +function formatCost(n: number): string { + return n.toFixed(2); +} + +function isSubstring(target: string, family: string): boolean { + return target.toLowerCase().includes(family.toLowerCase()); +} + +function matchesFamily(target: string, family: string): boolean { + const targetLower = target.toLowerCase(); + const familyLower = family.toLowerCase(); + let familyIdx = 0; + + for ( + let i = 0; + i < targetLower.length && familyIdx < familyLower.length; + i++ + ) { + if (targetLower[i] === familyLower[familyIdx]) { + familyIdx++; + } + } + + return familyIdx === familyLower.length; +} + +function inferFamily(modelId: string): string | undefined { + const sortedFamilies = [...ModelFamilyValues].sort( + (a, b) => b.length - a.length, + ); + + for (const family of sortedFamilies) { + if (isSubstring(modelId, family)) { + return family; + } + } + + for (const family of sortedFamilies) { + if (matchesFamily(modelId, family)) { + return family; + } + } + + return undefined; +} + +function buildInputModalities( + metadata: z.infer["metadata"], +): string[] { + const mods: string[] = ["text"]; + const tags = new Set(metadata?.tags ?? []); + + if (tags.has("vision") || tags.has("image")) { + mods.push("image"); + } + if (tags.has("file-input")) { + mods.push("pdf"); + } + + return mods; +} + +function buildOutputModalities( + metadata: z.infer["metadata"], +): string[] { + const mods: string[] = ["text"]; + const tags = new Set(metadata?.tags ?? []); + + if (tags.has("image-generation")) { + mods.push("image"); + } + + return mods; +} + +async function loadExistingModel( + filePath: string, +): Promise { + try { + const file = Bun.file(filePath); + if (!(await file.exists())) { + return null; + } + const toml = await import(filePath, { with: { type: "toml" } }).then( + (mod) => mod.default, + ); + return toml as ExistingModel; + } catch (e) { + console.warn(`Warning: Failed to parse existing file ${filePath}:`, e); + return null; + } +} + +function mergeModel( + apiModel: z.infer, + existing: ExistingModel | null, +): MergedModel { + const metadata = apiModel.metadata; + const tags = new Set(metadata?.tags ?? []); + + const modelName = apiModel.id.split("/").pop() || apiModel.id; + + const name = existing?.name ?? modelName; + const attachment = + existing?.attachment ?? (tags.has("vision") || tags.has("file-input")); + const reasoning = existing?.reasoning ?? tags.has("reasoning"); + const toolCall = existing?.tool_call ?? tags.has("tool-use"); + const openWeights = + existing?.open_weights ?? apiModel.owned_by !== "deepinfra"; + const family = existing?.family ?? inferFamily(apiModel.id); + const structuredOutput = existing?.structured_output; + const knowledge = existing?.knowledge; + const interleaved = existing?.interleaved; + const status = existing?.status; + + const releaseDate = existing?.release_date ?? getTodayDate(); + const lastUpdated = existing?.last_updated ?? getTodayDate(); + + const contextLimit = metadata?.context_length ?? 0; + const outputLimit = metadata?.max_tokens ?? contextLimit; + + const merged: MergedModel = { + name, + family, + attachment, + reasoning, + tool_call: toolCall, + temperature: true, + release_date: releaseDate, + last_updated: lastUpdated, + open_weights: openWeights, + ...(structuredOutput !== undefined && { + structured_output: structuredOutput, + }), + ...(knowledge && { knowledge }), + ...(interleaved !== undefined && { interleaved }), + ...(status && { status }), + limit: { + context: contextLimit, + ...(contextLimit > outputLimit && { input: contextLimit - outputLimit }), + output: outputLimit, + }, + modalities: { + input: buildInputModalities(metadata), + output: buildOutputModalities(metadata), + }, + }; + + if (metadata?.pricing) { + const pricing = metadata.pricing; + if ( + pricing.input_tokens !== undefined && + pricing.output_tokens !== undefined + ) { + merged.cost = { + input: pricing.input_tokens, + output: pricing.output_tokens, + ...(pricing.cache_read_tokens !== undefined && { + cache_read: pricing.cache_read_tokens, + }), + ...(pricing.cache_write_tokens !== undefined && { + cache_write: pricing.cache_write_tokens, + }), + }; + } + } + + return merged; +} + +function formatToml(model: MergedModel): string { + const lines: string[] = []; + + lines.push(`name = "${model.name.replace(/"/g, '\\"')}"`); + if (model.family) { + lines.push(`family = "${model.family}"`); + } + lines.push(`release_date = "${model.release_date}"`); + lines.push(`last_updated = "${model.last_updated}"`); + lines.push(`attachment = ${model.attachment}`); + lines.push(`reasoning = ${model.reasoning}`); + lines.push(`tool_call = ${model.tool_call}`); + if (model.structured_output !== undefined) { + lines.push(`structured_output = ${model.structured_output}`); + } + lines.push(`temperature = ${model.temperature}`); + if (model.knowledge) { + lines.push(`knowledge = "${model.knowledge}"`); + } + lines.push(`open_weights = ${model.open_weights}`); + if (model.status) { + lines.push(`status = "${model.status}"`); + } + + if (model.interleaved !== undefined) { + lines.push(""); + if (model.interleaved === true) { + lines.push(`interleaved = true`); + } else if (typeof model.interleaved === "object") { + lines.push(`[interleaved]`); + lines.push(`field = "${model.interleaved.field}"`); + } + } + + if (model.cost) { + lines.push(""); + lines.push(`[cost]`); + lines.push(`input = ${formatCost(model.cost.input)}`); + lines.push(`output = ${formatCost(model.cost.output)}`); + if (model.cost.cache_read !== undefined) { + lines.push(`cache_read = ${formatCost(model.cost.cache_read)}`); + } + if (model.cost.cache_write !== undefined) { + lines.push(`cache_write = ${formatCost(model.cost.cache_write)}`); + } + } + + lines.push(""); + lines.push(`[limit]`); + lines.push(`context = ${formatNumber(model.limit.context)}`); + if (model.limit.input !== undefined) { + lines.push(`input = ${formatNumber(model.limit.input)}`); + } + lines.push(`output = ${formatNumber(model.limit.output)}`); + + lines.push(""); + lines.push(`[modalities]`); + lines.push( + `input = [${model.modalities.input.map((m) => `"${m}"`).join(", ")}]`, + ); + lines.push( + `output = [${model.modalities.output.map((m) => `"${m}"`).join(", ")}]`, + ); + + return lines.join("\n") + "\n"; +} + +function detectChanges( + existing: ExistingModel | null, + merged: MergedModel, +): Changes[] { + if (!existing) return []; + + const changes: Changes[] = []; + + const shouldSkipStubbed = (field: string): boolean => { + return ( + field === StubbedFields.ReleaseDate || field === StubbedFields.LastUpdated + ); + }; + + const formatValue = (val: unknown, isCost = false): string => { + if (typeof val === "number") { + return isCost ? formatCost(val) : formatNumber(val); + } + if (Array.isArray(val)) return `[${val.join(", ")}]`; + if (val === undefined) return "(none)"; + return String(val); + }; + + const isMaterialPriceDiff = ( + oldPrice: unknown, + newPrice: unknown, + ): boolean => { + if (oldPrice === 0 && newPrice === undefined) return false; + + if (oldPrice !== undefined && newPrice !== undefined) { + return (oldPrice as number).toFixed(2) !== (newPrice as number).toFixed(2); + } + + return oldPrice !== newPrice; + }; + + const compare = (field: string, oldVal: unknown, newVal: unknown) => { + if (shouldSkipStubbed(field)) return; + + const isDiff = field.startsWith("cost.") + ? isMaterialPriceDiff(oldVal, newVal) + : JSON.stringify(oldVal) !== JSON.stringify(newVal); + + if (isDiff) { + const isCostField = field.startsWith("cost."); + changes.push({ + field, + oldValue: formatValue(oldVal, isCostField), + newValue: formatValue(newVal, isCostField), + }); + } + }; + + compare("name", existing.name, merged.name); + compare("family", existing.family, merged.family); + compare("attachment", existing.attachment, merged.attachment); + compare("reasoning", existing.reasoning, merged.reasoning); + compare("tool_call", existing.tool_call, merged.tool_call); + compare( + "structured_output", + existing.structured_output, + merged.structured_output, + ); + compare("open_weights", existing.open_weights, merged.open_weights); + compare("release_date", existing.release_date, merged.release_date); + compare("last_updated", existing.last_updated, merged.last_updated); + compare("cost.input", existing.cost?.input, merged.cost?.input); + compare("cost.output", existing.cost?.output, merged.cost?.output); + compare( + "cost.cache_read", + existing.cost?.cache_read, + merged.cost?.cache_read, + ); + compare( + "cost.cache_write", + existing.cost?.cache_write, + merged.cost?.cache_write, + ); + compare("limit.context", existing.limit?.context, merged.limit.context); + compare("limit.input", existing.limit?.input, merged.limit.input); + compare("limit.output", existing.limit?.output, merged.limit.output); + compare( + "modalities.input", + existing.modalities?.input, + merged.modalities.input, + ); + compare( + "modalities.output", + existing.modalities?.output, + merged.modalities.output, + ); + + return changes; +} + +async function main() { + const args = process.argv.slice(2); + const dryRun = args.includes("--dry-run"); + const newOnly = args.includes("--new-only"); + const noDelete = args.includes("--no-delete"); + + const modelsDir = path.join( + import.meta.dirname, + "..", + "..", + "..", + "providers", + "deepinfra", + "models", + ); + + console.log( + `${dryRun ? "[DRY RUN] " : ""}${newOnly ? "[NEW ONLY] " : ""}${noDelete ? "[NO DELETE] " : ""}Fetching DeepInfra models from API...`, + ); + + const res = await fetch(API_ENDPOINT); + if (!res.ok) { + console.error(`Failed to fetch API: ${res.status} ${res.statusText}`); + process.exit(1); + } + + const json = await res.json(); + const parsed = DeepInfraResponse.safeParse(json); + if (!parsed.success) { + console.error("Invalid API response:", parsed.error.errors); + process.exit(1); + } + + const apiModels = parsed.data.data; + + const existingFiles = new Set(); + try { + for await (const file of new Bun.Glob("**/*.toml").scan({ + cwd: modelsDir, + absolute: false, + })) { + existingFiles.add(file); + } + } catch {} + + console.log( + `Found ${apiModels.length} models in API, ${existingFiles.size} existing files\n`, + ); + + const apiModelIds = new Set(); + + let created = 0; + let updated = 0; + let unchanged = 0; + let deleted = 0; + let needsManualDates = 0; + + console.log("⚠️ WARNING: DeepInfra API returns incomplete metadata:"); + console.log( + " - created: 0 (stubbed) - release_date/last_updated need manual setting", + ); + console.log( + " - open_weights, reasoning, attachment are inferred from tags", + ); + console.log(" - Please verify these fields manually for new models\n"); + + for (const apiModel of apiModels) { + if (shouldSkipModel(apiModel.id)) { + continue; + } + + const relativePath = `${apiModel.id}.toml`; + const filePath = path.join(modelsDir, relativePath); + const dirPath = path.dirname(filePath); + + apiModelIds.add(relativePath); + + const existing = await loadExistingModel(filePath); + const merged = mergeModel(apiModel, existing); + const tomlContent = formatToml(merged); + + if (existing === null) { + created++; + if (apiModel.created === 0) { + needsManualDates++; + } + if (dryRun) { + console.log(`[DRY RUN] Would create: ${relativePath}`); + console.log(` name = "${merged.name}"`); + if (merged.family) { + console.log(` family = "${merged.family}" (inferred)`); + } + if (apiModel.created === 0) { + console.log( + ` ⚠️ release_date = "${merged.release_date}" (stubbed - needs manual update)`, + ); + console.log( + ` ⚠️ last_updated = "${merged.last_updated}" (stubbed - needs manual update)`, + ); + } + console.log(""); + } else { + await mkdir(dirPath, { recursive: true }); + await Bun.write(filePath, tomlContent); + console.log(`Created: ${relativePath}`); + if (apiModel.created === 0) { + console.log( + ` ⚠️ Please manually update release_date and last_updated`, + ); + } + } + } else { + if (newOnly) { + unchanged++; + continue; + } + + const changes = detectChanges(existing, merged); + + if (changes.length > 0) { + updated++; + if (dryRun) { + console.log(`[DRY RUN] Would update: ${relativePath}`); + } else { + await mkdir(dirPath, { recursive: true }); + await Bun.write(filePath, tomlContent); + console.log(`Updated: ${relativePath}`); + } + for (const change of changes) { + console.log( + ` ${change.field}: ${change.oldValue} → ${change.newValue}`, + ); + } + console.log(""); + } else { + unchanged++; + } + } + } + + const orphaned: string[] = []; + for (const file of existingFiles) { + if (!apiModelIds.has(file)) { + orphaned.push(file); + const filePath = path.join(modelsDir, file); + + if (noDelete) { + console.log(`Warning: Orphaned file (not in API): ${file}`); + } else if (dryRun) { + console.log(`[DRY RUN] Would delete: ${file}`); + } else { + try { + await unlink(filePath); + deleted++; + console.log(`Deleted: ${file}`); + } catch (e) { + console.warn(`Warning: Failed to delete ${file}:`, e); + } + } + } + } + + console.log(""); + const orphanedInfo = + noDelete && orphaned.length > 0 ? `, ${orphaned.length} orphaned` : ""; + if (dryRun) { + console.log( + `Summary: ${created} would be created (${needsManualDates} with stubbed dates), ${updated} would be updated, ${unchanged} unchanged, ${deleted} would be deleted${orphanedInfo}`, + ); + } else { + console.log( + `Summary: ${created} created (${needsManualDates} with stubbed dates), ${updated} updated, ${unchanged} unchanged, ${deleted} deleted${orphanedInfo}`, + ); + } + + if (created > 0) { + console.log("\n⚠️ IMPORTANT: Please manually review new models for:"); + console.log( + " - release_date and last_updated (API returns stubbed values)", + ); + console.log( + " - open_weights, reasoning, attachment (inferred from tags, may be inaccurate)", + ); + } +} + +await main(); From cb41a65d8f08b602c8b80d25af817ce273e37a55 Mon Sep 17 00:00:00 2001 From: Christopher Tam Date: Mon, 6 Apr 2026 09:43:11 -0400 Subject: [PATCH 2/6] deepinfra: Sync DeepInfra models --- .../deepinfra/models/ByteDance/Seed-1.8.toml | 22 ++++++++++++++++ .../models/ByteDance/Seed-2.0-mini.toml | 22 ++++++++++++++++ .../models/ByteDance/Seed-2.0-pro.toml | 22 ++++++++++++++++ .../models/Gryphe/MythoMax-L2-13b.toml | 21 +++++++++++++++ .../models/MiniMaxAI/MiniMax-M2.1.toml | 19 +++++++------- .../models/MiniMaxAI/MiniMax-M2.5.toml | 14 +++++----- .../models/MiniMaxAI/MiniMax-M2.toml | 26 ------------------- .../NousResearch/Hermes-3-Llama-3.1-405B.toml | 21 +++++++++++++++ .../NousResearch/Hermes-3-Llama-3.1-70B.toml | 21 +++++++++++++++ .../PaddlePaddle/PaddleOCR-VL-0.9B.toml | 21 +++++++++++++++ .../models/Qwen/Qwen2.5-72B-Instruct.toml | 21 +++++++++++++++ .../models/Qwen/Qwen2.5-VL-32B-Instruct.toml | 21 +++++++++++++++ .../deepinfra/models/Qwen/Qwen3-14B.toml | 21 +++++++++++++++ .../Qwen/Qwen3-235B-A22B-Instruct-2507.toml | 21 +++++++++++++++ .../Qwen/Qwen3-235B-A22B-Thinking-2507.toml | 22 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3-30B-A3B.toml | 21 +++++++++++++++ .../deepinfra/models/Qwen/Qwen3-32B.toml | 21 +++++++++++++++ .../Qwen3-Coder-480B-A35B-Instruct-Turbo.toml | 13 +++++----- .../Qwen/Qwen3-Coder-480B-A35B-Instruct.toml | 12 ++++----- .../models/Qwen/Qwen3-Max-Thinking.toml | 22 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3-Max.toml | 22 ++++++++++++++++ .../Qwen/Qwen3-Next-80B-A3B-Instruct.toml | 21 +++++++++++++++ .../Qwen/Qwen3-VL-235B-A22B-Instruct.toml | 22 ++++++++++++++++ .../Qwen/Qwen3-VL-30B-A3B-Instruct.toml | 21 +++++++++++++++ .../deepinfra/models/Qwen/Qwen3.5-0.8B.toml | 22 ++++++++++++++++ .../models/Qwen/Qwen3.5-122B-A10B.toml | 22 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3.5-27B.toml | 22 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3.5-2B.toml | 22 ++++++++++++++++ .../models/Qwen/Qwen3.5-35B-A3B.toml | 22 ++++++++++++++++ .../models/Qwen/Qwen3.5-397B-A17B.toml | 21 +++++++++++++++ .../deepinfra/models/Qwen/Qwen3.5-4B.toml | 22 ++++++++++++++++ .../deepinfra/models/Qwen/Qwen3.5-9B.toml | 22 ++++++++++++++++ .../models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml | 21 +++++++++++++++ .../L3.1-70B-Euryale-v2.2.toml} | 17 ++++++------ .../models/Sao10K/L3.3-70B-Euryale-v2.3.toml | 21 +++++++++++++++ .../models/allenai/Olmo-3.1-32B-Instruct.toml | 21 +++++++++++++++ .../models/allenai/olmOCR-2-7B-1025.toml | 21 +++++++++++++++ .../anthropic/claude-3-7-sonnet-latest.toml | 8 +++--- .../models/anthropic/claude-4-opus.toml | 8 +++--- .../models/anthropic/claude-4-sonnet.toml | 22 ++++++++++++++++ .../models/deepseek-ai/DeepSeek-OCR.toml | 21 +++++++++++++++ .../deepseek-ai/DeepSeek-R1-0528-Turbo.toml | 22 ++++++++++++++++ .../models/deepseek-ai/DeepSeek-R1-0528.toml | 9 ++++--- .../DeepSeek-R1-Distill-Llama-70B.toml | 22 ++++++++++++++++ .../models/deepseek-ai/DeepSeek-V3-0324.toml | 23 ++++++++++++++++ .../deepseek-ai/DeepSeek-V3.1-Terminus.toml | 22 ++++++++++++++++ .../models/deepseek-ai/DeepSeek-V3.1.toml | 22 ++++++++++++++++ .../models/deepseek-ai/DeepSeek-V3.2.toml | 7 ++--- .../models/deepseek-ai/DeepSeek-V3.toml | 22 ++++++++++++++++ .../models/google/gemini-1.5-flash-8b.toml | 22 ++++++++++++++++ .../models/google/gemini-1.5-flash.toml | 22 ++++++++++++++++ .../models/google/gemini-2.5-flash.toml | 23 ++++++++++++++++ .../models/google/gemini-2.5-pro.toml | 23 ++++++++++++++++ .../models/google/gemma-3-12b-it.toml | 23 ++++++++++++++++ .../models/google/gemma-3-27b-it.toml | 23 ++++++++++++++++ .../models/google/gemma-3-4b-it.toml | 22 ++++++++++++++++ .../Llama-3.2-11B-Vision-Instruct.toml | 22 ++++++++++++++++ .../Llama-3.3-70B-Instruct-Turbo.toml | 3 ++- ...lama-4-Maverick-17B-128E-Instruct-FP8.toml | 6 +++-- .../Llama-4-Scout-17B-16E-Instruct.toml | 8 +++--- .../models/meta-llama/Llama-Guard-4-12B.toml | 22 ++++++++++++++++ .../meta-llama/Meta-Llama-3-8B-Instruct.toml | 21 +++++++++++++++ ...=> Meta-Llama-3.1-70B-Instruct-Turbo.toml} | 10 ++++--- ....toml => Meta-Llama-3.1-70B-Instruct.toml} | 9 ++++--- ... => Meta-Llama-3.1-8B-Instruct-Turbo.toml} | 5 ++-- ...t.toml => Meta-Llama-3.1-8B-Instruct.toml} | 9 ++++--- .../deepinfra/models/microsoft/phi-4.toml | 22 ++++++++++++++++ .../mistralai/Mistral-Nemo-Instruct-2407.toml | 21 +++++++++++++++ .../Mistral-Small-24B-Instruct-2501.toml | 21 +++++++++++++++ .../Mistral-Small-3.2-24B-Instruct-2506.toml | 21 +++++++++++++++ .../mistralai/Mixtral-8x7B-Instruct-v0.1.toml | 22 ++++++++++++++++ .../moonshotai/Kimi-K2-Instruct-0905.toml | 9 +++---- .../models/moonshotai/Kimi-K2-Thinking.toml | 6 ++--- .../models/moonshotai/Kimi-K2.5-Turbo.toml | 23 ++++++++++++++++ .../models/moonshotai/Kimi-K2.5.toml | 18 ++++++------- .../Llama-3.1-Nemotron-70B-Instruct.toml | 21 +++++++++++++++ .../Llama-3.3-Nemotron-Super-49B-v1.5.toml | 21 +++++++++++++++ .../NVIDIA-Nemotron-3-Super-120B-A12B.toml | 23 ++++++++++++++++ .../NVIDIA-Nemotron-Nano-12B-v2-VL.toml | 22 ++++++++++++++++ .../nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml | 22 ++++++++++++++++ .../nvidia/Nemotron-3-Nano-30B-A3B.toml | 22 ++++++++++++++++ .../models/openai/gpt-oss-120b-Turbo.toml | 21 +++++++++++++++ .../deepinfra/models/openai/gpt-oss-120b.toml | 11 +++----- .../deepinfra/models/openai/gpt-oss-20b.toml | 7 ++--- .../models/stepfun-ai/Step-3.5-Flash.toml | 22 ++++++++++++++++ .../deepinfra/models/zai-org/GLM-4.5.toml | 26 ------------------- .../deepinfra/models/zai-org/GLM-4.6.toml | 13 +++++----- .../deepinfra/models/zai-org/GLM-4.6V.toml | 11 ++++---- .../models/zai-org/GLM-4.7-Flash.toml | 11 ++++---- .../deepinfra/models/zai-org/GLM-4.7.toml | 14 +++++----- providers/deepinfra/models/zai-org/GLM-5.toml | 14 +++++----- 91 files changed, 1502 insertions(+), 189 deletions(-) create mode 100644 providers/deepinfra/models/ByteDance/Seed-1.8.toml create mode 100644 providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml create mode 100644 providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml create mode 100644 providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml delete mode 100644 providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml create mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml create mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml create mode 100644 providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-14B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-32B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-Max-Thinking.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-Max.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-27B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-2B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-4B.toml create mode 100644 providers/deepinfra/models/Qwen/Qwen3.5-9B.toml create mode 100644 providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml rename providers/deepinfra/models/{moonshotai/Kimi-K2-Instruct.toml => Sao10K/L3.1-70B-Euryale-v2.2.toml} (53%) create mode 100644 providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml create mode 100644 providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml create mode 100644 providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml create mode 100644 providers/deepinfra/models/anthropic/claude-4-sonnet.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-OCR.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml create mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml create mode 100644 providers/deepinfra/models/google/gemini-1.5-flash-8b.toml create mode 100644 providers/deepinfra/models/google/gemini-1.5-flash.toml create mode 100644 providers/deepinfra/models/google/gemini-2.5-flash.toml create mode 100644 providers/deepinfra/models/google/gemini-2.5-pro.toml create mode 100644 providers/deepinfra/models/google/gemma-3-12b-it.toml create mode 100644 providers/deepinfra/models/google/gemma-3-27b-it.toml create mode 100644 providers/deepinfra/models/google/gemma-3-4b-it.toml create mode 100644 providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml create mode 100644 providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml create mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml rename providers/deepinfra/models/meta-llama/{Llama-3.1-70B-Instruct.toml => Meta-Llama-3.1-70B-Instruct-Turbo.toml} (56%) rename providers/deepinfra/models/meta-llama/{Llama-3.1-70B-Instruct-Turbo.toml => Meta-Llama-3.1-70B-Instruct.toml} (61%) rename providers/deepinfra/models/meta-llama/{Llama-3.1-8B-Instruct-Turbo.toml => Meta-Llama-3.1-8B-Instruct-Turbo.toml} (76%) rename providers/deepinfra/models/meta-llama/{Llama-3.1-8B-Instruct.toml => Meta-Llama-3.1-8B-Instruct.toml} (61%) create mode 100644 providers/deepinfra/models/microsoft/phi-4.toml create mode 100644 providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml create mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml create mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml create mode 100644 providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml create mode 100644 providers/deepinfra/models/moonshotai/Kimi-K2.5-Turbo.toml create mode 100644 providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml create mode 100644 providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml create mode 100644 providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml create mode 100644 providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL.toml create mode 100644 providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml create mode 100644 providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml create mode 100644 providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml create mode 100644 providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml delete mode 100644 providers/deepinfra/models/zai-org/GLM-4.5.toml diff --git a/providers/deepinfra/models/ByteDance/Seed-1.8.toml b/providers/deepinfra/models/ByteDance/Seed-1.8.toml new file mode 100644 index 000000000..1b2a295ff --- /dev/null +++ b/providers/deepinfra/models/ByteDance/Seed-1.8.toml @@ -0,0 +1,22 @@ +name = "Seed 1.8" +family = "seed" +release_date = "2025-12-18" +last_updated = "2026-02-25" +attachment = true +reasoning = true +tool_call = true +temperature = true +open_weights = false + +[cost] +input = 0.25 +output = 2.00 +cache_read = 0.05 + +[limit] +context = 256_000 +output = 256_000 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml b/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml new file mode 100644 index 000000000..dd252063f --- /dev/null +++ b/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml @@ -0,0 +1,22 @@ +name = "Seed-2.0-mini" +family = "seed" +release_date = "2026-02-26" +last_updated = "2026-02-26" +attachment = true +reasoning = true +tool_call = true +temperature = true +open_weights = false + +[cost] +input = 0.10 +output = 0.40 +cache_read = 0.02 + +[limit] +context = 256_000 +output = 256_000 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml b/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml new file mode 100644 index 000000000..6f8c1e024 --- /dev/null +++ b/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml @@ -0,0 +1,22 @@ +name = "Seed-2.0-pro" +family = "seed" +release_date = "2026-02-14" +last_updated = "2026-02-14" +attachment = true +reasoning = true +tool_call = true +temperature = true +open_weights = false + +[cost] +input = 0.50 +output = 3.00 +cache_read = 0.10 + +[limit] +context = 256_000 +output = 256_000 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml new file mode 100644 index 000000000..473d3ebbc --- /dev/null +++ b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml @@ -0,0 +1,21 @@ +name = "MythoMax 13B" +family = "o" +release_date = "2024-04-25" +last_updated = "2024-04-25" +attachment = false +reasoning = false +tool_call = false +temperature = true +open_weights = true + +[cost] +input = 0.40 +output = 0.40 + +[limit] +context = 4_096 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml index a0b0ac5d7..c0896240f 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.1.toml @@ -1,25 +1,26 @@ name = "MiniMax M2.1" +family = "minimax" release_date = "2025-12-23" last_updated = "2025-12-23" attachment = false reasoning = true -temperature = true tool_call = true -open_weights = true +temperature = true knowledge = "2025-06" +open_weights = true + +[interleaved] +field = "reasoning_content" [cost] -input = 0.28 -output = 1.20 -cached_read = 0.14 +input = 0.27 +output = 0.95 +cache_read = 0.03 [limit] context = 196_608 output = 196_608 - + [modalities] input = ["text"] output = ["text"] - -[interleaved] -field = "reasoning_content" diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml index bbbdbe470..163cf2b73 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml @@ -1,28 +1,26 @@ -# https://deepinfra.com/MiniMaxAI/MiniMax-M2.5 name = "MiniMax M2.5" family = "minimax" release_date = "2026-02-12" last_updated = "2026-02-12" attachment = false reasoning = true -temperature = true tool_call = true +temperature = true knowledge = "2025-06" open_weights = true +[interleaved] +field = "reasoning_content" + [cost] input = 0.27 output = 0.95 cache_read = 0.03 -cache_write = 0.375 [limit] -context = 204_800 -output = 131_072 +context = 196_608 +output = 196_608 [modalities] input = ["text"] output = ["text"] - -[interleaved] -field = "reasoning_content" diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml deleted file mode 100644 index e726226c7..000000000 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.toml +++ /dev/null @@ -1,26 +0,0 @@ -name = "MiniMax M2" -family = "minimax" -release_date = "2025-11-13" -last_updated = "2025-11-13" -attachment = false -reasoning = true -temperature = true -tool_call = true -knowledge = "2024-10" -open_weights = true - -[interleaved] -field = "reasoning_content" - -[cost] -input = 0.254 -output = 1.02 -cached_input = 0.127 - -[limit] -context = 262_144 -output = 32_768 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml new file mode 100644 index 000000000..27aebb6a0 --- /dev/null +++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml @@ -0,0 +1,21 @@ +name = "Hermes 3 Llama 3.1 405B" +family = "nousresearch" +release_date = "2024-08-16" +last_updated = "2024-08-16" +attachment = false +reasoning = false +tool_call = false +temperature = true +open_weights = true + +[cost] +input = 1.00 +output = 1.00 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml new file mode 100644 index 000000000..f9a61481e --- /dev/null +++ b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml @@ -0,0 +1,21 @@ +name = "Hermes 3 Llama 3.1 70B" +family = "nousresearch" +release_date = "2024-08-18" +last_updated = "2026-03-15" +attachment = false +reasoning = false +tool_call = false +temperature = true +open_weights = true + +[cost] +input = 0.30 +output = 0.30 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml b/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml new file mode 100644 index 000000000..62a9150db --- /dev/null +++ b/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml @@ -0,0 +1,21 @@ +name = "PaddleOCR-VL-0.9B" +family = "o" +release_date = "2025-10-22" +last_updated = "2025-10-22" +attachment = true +reasoning = false +tool_call = false +temperature = true +open_weights = true + +[cost] +input = 0.14 +output = 0.80 + +[limit] +context = 16_384 +output = 16_384 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml new file mode 100644 index 000000000..83b19b273 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml @@ -0,0 +1,21 @@ +name = "Qwen 2.5 72B Instruct" +family = "qwen" +release_date = "2024-09-18" +last_updated = "2025-11-25" +attachment = false +reasoning = false +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.12 +output = 0.39 + +[limit] +context = 32_768 +output = 32_768 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml new file mode 100644 index 000000000..f375659e6 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml @@ -0,0 +1,21 @@ +name = "Qwen 2.5 VL 32B Instruct" +family = "qwen" +release_date = "2025-03-24" +last_updated = "2025-11-25" +attachment = true +reasoning = false +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.20 +output = 0.60 + +[limit] +context = 128_000 +output = 128_000 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-14B.toml b/providers/deepinfra/models/Qwen/Qwen3-14B.toml new file mode 100644 index 000000000..246d8a522 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-14B.toml @@ -0,0 +1,21 @@ +name = "Qwen 3 14B" +family = "qwen" +release_date = "2025-04-30" +last_updated = "2025-11-25" +attachment = false +reasoning = true +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.12 +output = 0.24 + +[limit] +context = 40_960 +output = 40_960 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml new file mode 100644 index 000000000..bafeb56fd --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml @@ -0,0 +1,21 @@ +name = "Qwen 3 235B A22B Instruct (2507)" +family = "qwen" +release_date = "2025-07-23" +last_updated = "2025-11-25" +attachment = false +reasoning = false +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.07 +output = 0.10 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml new file mode 100644 index 000000000..7b5be5295 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml @@ -0,0 +1,22 @@ +name = "Qwen 3 235B A22B Thinking (2507)" +family = "qwen" +release_date = "2025-07-23" +last_updated = "2025-11-25" +attachment = false +reasoning = true +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.23 +output = 2.30 +cache_read = 0.20 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml new file mode 100644 index 000000000..ef8796562 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-30B-A3B.toml @@ -0,0 +1,21 @@ +name = "Qwen 3 30B A3B" +family = "qwen" +release_date = "2025-04-29" +last_updated = "2025-04-29" +attachment = false +reasoning = true +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.08 +output = 0.28 + +[limit] +context = 40_960 +output = 40_960 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-32B.toml b/providers/deepinfra/models/Qwen/Qwen3-32B.toml new file mode 100644 index 000000000..a07153996 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-32B.toml @@ -0,0 +1,21 @@ +name = "Qwen 3 32B" +family = "qwen" +release_date = "2025-04-30" +last_updated = "2025-11-25" +attachment = false +reasoning = true +tool_call = true +temperature = true +open_weights = false + +[cost] +input = 0.08 +output = 0.28 + +[limit] +context = 40_960 +output = 40_960 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml index 92f8cb540..6811d8732 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml @@ -1,22 +1,23 @@ -name = "Qwen3 Coder 480B A35B Instruct Turbo" +name = "Qwen 3 Coder 480B A35B Instruct Turbo" family = "qwen" release_date = "2025-07-23" last_updated = "2025-07-23" attachment = false reasoning = false +tool_call = true temperature = true knowledge = "2025-04" -tool_call = true open_weights = true [cost] -input = 0.3 -output = 1.2 +input = 0.22 +output = 1.00 +cache_read = 0.02 [limit] context = 262_144 -output = 66_536 +output = 262_144 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml index 08c0ed4ab..6d74e028f 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml @@ -1,22 +1,22 @@ -name = "Qwen3 Coder 480B A35B Instruct" +name = "Qwen 3 Coder 480B A35B Instruct" family = "qwen" release_date = "2025-07-23" last_updated = "2025-07-23" attachment = false reasoning = false +tool_call = true temperature = true knowledge = "2025-04" -tool_call = true open_weights = true [cost] -input = 0.4 -output = 1.6 +input = 0.40 +output = 1.60 [limit] context = 262_144 -output = 66_536 +output = 262_144 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Max-Thinking.toml b/providers/deepinfra/models/Qwen/Qwen3-Max-Thinking.toml new file mode 100644 index 000000000..8cc40b516 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-Max-Thinking.toml @@ -0,0 +1,22 @@ +name = "Qwen 3 Max Thinking" +family = "qwen" +release_date = "2026-01-23" +last_updated = "2026-03-15" +attachment = false +reasoning = true +tool_call = true +temperature = true +open_weights = false + +[cost] +input = 1.20 +output = 6.00 +cache_read = 0.24 + +[limit] +context = 256_000 +output = 256_000 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Max.toml b/providers/deepinfra/models/Qwen/Qwen3-Max.toml new file mode 100644 index 000000000..351e14eea --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-Max.toml @@ -0,0 +1,22 @@ +name = "Qwen 3 Max" +family = "qwen" +release_date = "2026-04-06" +last_updated = "2026-04-06" +attachment = false +reasoning = false +tool_call = true +temperature = true +open_weights = false + +[cost] +input = 1.20 +output = 6.00 +cache_read = 0.24 + +[limit] +context = 256_000 +output = 256_000 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml new file mode 100644 index 000000000..97bba055b --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-Next-80B-A3B-Instruct.toml @@ -0,0 +1,21 @@ +name = "Qwen 3 Next 80B A3B Instruct" +family = "qwen" +release_date = "2025-09-11" +last_updated = "2026-03-15" +attachment = false +reasoning = false +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.09 +output = 1.10 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml new file mode 100644 index 000000000..4af5e30f5 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-VL-235B-A22B-Instruct.toml @@ -0,0 +1,22 @@ +name = "Qwen 3 VL 235B A22B Instruct" +family = "qwen" +release_date = "2025-09-23" +last_updated = "2026-01-10" +attachment = true +reasoning = false +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.20 +output = 0.88 +cache_read = 0.11 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml new file mode 100644 index 000000000..2e7e06e19 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3-VL-30B-A3B-Instruct.toml @@ -0,0 +1,21 @@ +name = "Qwen 3 VL 30B A3B Instruct" +family = "qwen" +release_date = "2025-10-05" +last_updated = "2025-11-25" +attachment = true +reasoning = false +tool_call = true +temperature = true +open_weights = false + +[cost] +input = 0.15 +output = 0.60 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml new file mode 100644 index 000000000..027ff8129 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-0.8B.toml @@ -0,0 +1,22 @@ +name = "Qwen 3.5 0.8B" +family = "qwen" +release_date = "2026-02-23" +last_updated = "2026-03-26" +attachment = true +reasoning = true +tool_call = true +temperature = true +knowledge = "2026-01" +open_weights = true + +[cost] +input = 0.01 +output = 0.05 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml new file mode 100644 index 000000000..460fc9b11 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-122B-A10B.toml @@ -0,0 +1,22 @@ +name = "Qwen 3.5 122B A10B" +family = "qwen" +release_date = "2026-02-26" +last_updated = "2026-03-15" +attachment = true +reasoning = true +tool_call = true +temperature = true +knowledge = "2026-01" +open_weights = true + +[cost] +input = 0.29 +output = 2.90 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml new file mode 100644 index 000000000..45d119b1d --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-27B.toml @@ -0,0 +1,22 @@ +name = "Qwen 3.5 27B" +family = "qwen" +release_date = "2026-02-26" +last_updated = "2026-03-15" +attachment = true +reasoning = true +tool_call = true +temperature = true +knowledge = "2026-01" +open_weights = true + +[cost] +input = 0.26 +output = 2.60 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml new file mode 100644 index 000000000..a8e8052af --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-2B.toml @@ -0,0 +1,22 @@ +name = "Qwen 3.5 2B" +family = "qwen" +release_date = "2026-03-02" +last_updated = "2026-03-02" +attachment = true +reasoning = true +tool_call = true +temperature = true +knowledge = "2026-01" +open_weights = true + +[cost] +input = 0.02 +output = 0.10 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml new file mode 100644 index 000000000..cda3af4da --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml @@ -0,0 +1,22 @@ +name = "Qwen 3.5 35B A3B" +family = "qwen" +release_date = "2026-02-26" +last_updated = "2026-03-15" +attachment = true +reasoning = true +tool_call = true +temperature = true +knowledge = "2026-01" +open_weights = true + +[cost] +input = 0.22 +output = 2.20 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml new file mode 100644 index 000000000..0b748a722 --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml @@ -0,0 +1,21 @@ +name = "Qwen 3.5 397B A17B" +family = "qwen" +release_date = "2026-02-15" +last_updated = "2026-03-15" +attachment = true +reasoning = true +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.54 +output = 3.40 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml new file mode 100644 index 000000000..adf9c0c4f --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-4B.toml @@ -0,0 +1,22 @@ +name = "Qwen 3.5 4B" +family = "qwen" +release_date = "2026-03-02" +last_updated = "2026-03-02" +attachment = true +reasoning = true +tool_call = true +temperature = true +knowledge = "2026-01" +open_weights = true + +[cost] +input = 0.03 +output = 0.15 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml new file mode 100644 index 000000000..ac1f3500c --- /dev/null +++ b/providers/deepinfra/models/Qwen/Qwen3.5-9B.toml @@ -0,0 +1,22 @@ +name = "Qwen 3.5 9B" +family = "qwen" +release_date = "2026-03-10" +last_updated = "2026-03-15" +attachment = true +reasoning = false +tool_call = true +temperature = true +knowledge = "2026-01" +open_weights = true + +[cost] +input = 0.04 +output = 0.20 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml new file mode 100644 index 000000000..2bdebc4f7 --- /dev/null +++ b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml @@ -0,0 +1,21 @@ +name = "L3 8B Lunaris v1 Turbo" +family = "o" +release_date = "2024-08-13" +last_updated = "2026-03-15" +attachment = false +reasoning = false +tool_call = false +temperature = true +open_weights = true + +[cost] +input = 0.04 +output = 0.05 + +[limit] +context = 8_192 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml similarity index 53% rename from providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml rename to providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml index 285310a44..7aeb7931b 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct.toml +++ b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml @@ -1,21 +1,20 @@ -name = "Kimi K2" -family = "kimi" -release_date = "2025-07-11" -last_updated = "2025-07-11" +name = "L3.1 70B Euryale v2.2" +family = "o" +release_date = "2024-09-19" +last_updated = "2024-09-19" attachment = false reasoning = false -temperature = true tool_call = true -knowledge = "2024-10" +temperature = true open_weights = true [cost] -input = 0.50 -output = 2.00 +input = 0.85 +output = 0.85 [limit] context = 131_072 -output = 32_768 +output = 131_072 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml b/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml new file mode 100644 index 000000000..daccc1e8e --- /dev/null +++ b/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml @@ -0,0 +1,21 @@ +name = "L3.3 70B Euryale v2.3" +family = "o" +release_date = "2024-12-06" +last_updated = "2024-12-06" +attachment = false +reasoning = false +tool_call = false +temperature = true +open_weights = false + +[cost] +input = 0.85 +output = 0.85 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml b/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml new file mode 100644 index 000000000..f9c5727ea --- /dev/null +++ b/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml @@ -0,0 +1,21 @@ +name = "Olmo 3.1 32B Instruct" +family = "allenai" +release_date = "2026-01-07" +last_updated = "2026-03-15" +attachment = false +reasoning = false +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.20 +output = 0.60 + +[limit] +context = 65_536 +output = 65_536 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml b/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml new file mode 100644 index 000000000..fb09bf38a --- /dev/null +++ b/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml @@ -0,0 +1,21 @@ +name = "olmOCR-2-7B-1025" +family = "allenai" +release_date = "2025-10-22" +last_updated = "2025-10-22" +attachment = true +reasoning = false +tool_call = false +temperature = true +open_weights = true + +[cost] +input = 0.09 +output = 0.19 + +[limit] +context = 16_384 +output = 16_384 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml index 28d93a4fb..2aa134f79 100644 --- a/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml +++ b/providers/deepinfra/models/anthropic/claude-3-7-sonnet-latest.toml @@ -4,19 +4,19 @@ release_date = "2025-03-13" last_updated = "2025-03-13" attachment = true reasoning = true +tool_call = true temperature = true knowledge = "2024-10-31" -tool_call = true open_weights = false [cost] -input = 3.3 -output = 16.5 +input = 3.30 +output = 16.50 cache_read = 0.33 [limit] context = 200_000 -output = 64_000 +output = 200_000 [modalities] input = ["text", "image"] diff --git a/providers/deepinfra/models/anthropic/claude-4-opus.toml b/providers/deepinfra/models/anthropic/claude-4-opus.toml index 51f441d78..f481ff1f9 100644 --- a/providers/deepinfra/models/anthropic/claude-4-opus.toml +++ b/providers/deepinfra/models/anthropic/claude-4-opus.toml @@ -4,18 +4,18 @@ release_date = "2025-06-12" last_updated = "2025-06-12" attachment = true reasoning = true +tool_call = true temperature = true knowledge = "2025-03-31" -tool_call = true open_weights = false [cost] -input = 16.5 -output = 82.5 +input = 16.50 +output = 82.50 [limit] context = 200_000 -output = 32_000 +output = 200_000 [modalities] input = ["text", "image"] diff --git a/providers/deepinfra/models/anthropic/claude-4-sonnet.toml b/providers/deepinfra/models/anthropic/claude-4-sonnet.toml new file mode 100644 index 000000000..494f59682 --- /dev/null +++ b/providers/deepinfra/models/anthropic/claude-4-sonnet.toml @@ -0,0 +1,22 @@ +name = "Claude 4 Sonnet" +family = "claude" +release_date = "2025-05-22" +last_updated = "2025-05-22" +attachment = true +reasoning = true +tool_call = true +temperature = true +knowledge = "2025-03-31" +open_weights = false + +[cost] +input = 3.30 +output = 16.50 + +[limit] +context = 200_000 +output = 200_000 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-OCR.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-OCR.toml new file mode 100644 index 000000000..20fd7c6fe --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-OCR.toml @@ -0,0 +1,21 @@ +name = "DeepSeek OCR" +family = "deepseek" +release_date = "2024-12-13" +last_updated = "2025-01-15" +attachment = true +reasoning = false +tool_call = false +temperature = true +open_weights = true + +[cost] +input = 0.03 +output = 0.10 + +[limit] +context = 8_192 +output = 8_192 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml new file mode 100644 index 000000000..455a2b95c --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528-Turbo.toml @@ -0,0 +1,22 @@ +name = "DeepSeek R1 0528 Turbo" +family = "deepseek" +release_date = "2025-05-28" +last_updated = "2025-05-28" +attachment = false +reasoning = true +tool_call = false +temperature = true +knowledge = "2025-03" +open_weights = false + +[cost] +input = 1.00 +output = 3.00 + +[limit] +context = 32_768 +output = 32_768 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml index 2733a6938..5546f2193 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml @@ -1,24 +1,25 @@ -name = "DeepSeek-R1-0528" +name = "DeepSeek R1 0528" +family = "deepseek" release_date = "2025-05-28" last_updated = "2025-05-28" attachment = false reasoning = true +tool_call = true temperature = true knowledge = "2024-07" -tool_call = false open_weights = false [interleaved] field = "reasoning_content" [cost] -input = 0.5 +input = 0.50 output = 2.15 cache_read = 0.35 [limit] context = 163_840 -output = 64_000 +output = 163_840 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml new file mode 100644 index 000000000..4f3cb97cc --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-Distill-Llama-70B.toml @@ -0,0 +1,22 @@ +name = "DeepSeek R1 Distill Llama 70B" +family = "deepseek" +release_date = "2025-01-20" +last_updated = "2025-01-20" +attachment = false +reasoning = true +tool_call = true +temperature = true +knowledge = "2024-07" +open_weights = true + +[cost] +input = 0.70 +output = 0.80 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml new file mode 100644 index 000000000..c84aaf23b --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml @@ -0,0 +1,23 @@ +name = "DeepSeek V3 0324" +family = "deepseek" +release_date = "2025-03-24" +last_updated = "2025-03-24" +attachment = false +reasoning = false +tool_call = true +temperature = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.20 +output = 0.77 +cache_read = 0.14 + +[limit] +context = 163_840 +output = 163_840 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml new file mode 100644 index 000000000..3c808740d --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml @@ -0,0 +1,22 @@ +name = "DeepSeek V3.1 Terminus" +family = "deepseek" +release_date = "2025-09-22" +last_updated = "2025-10-15" +attachment = false +reasoning = true +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.21 +output = 0.79 +cache_read = 0.13 + +[limit] +context = 163_840 +output = 163_840 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml new file mode 100644 index 000000000..975c04df2 --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml @@ -0,0 +1,22 @@ +name = "DeepSeek V3.1" +family = "deepseek" +release_date = "2025-08-21" +last_updated = "2025-08-21" +attachment = false +reasoning = true +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.21 +output = 0.79 +cache_read = 0.13 + +[limit] +context = 163_840 +output = 163_840 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml index 74d5e7d60..47e3f1085 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.2.toml @@ -1,11 +1,12 @@ -name = "DeepSeek-V3.2" +name = "DeepSeek V3.2" +family = "deepseek" release_date = "2025-12-02" last_updated = "2025-12-02" attachment = false reasoning = true +tool_call = true temperature = true knowledge = "2024-12" -tool_call = true open_weights = false [interleaved] @@ -18,7 +19,7 @@ cache_read = 0.13 [limit] context = 163_840 -output = 64_000 +output = 163_840 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml new file mode 100644 index 000000000..b2a43d543 --- /dev/null +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml @@ -0,0 +1,22 @@ +name = "DeepSeek-V3" +family = "deepseek" +release_date = "2024-12-26" +last_updated = "2025-01-20" +attachment = false +reasoning = false +tool_call = true +temperature = true +knowledge = "2024-07" +open_weights = true + +[cost] +input = 0.32 +output = 0.89 + +[limit] +context = 163_840 +output = 163_840 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml b/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml new file mode 100644 index 000000000..465ac3578 --- /dev/null +++ b/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml @@ -0,0 +1,22 @@ +name = "Gemini 1.5 Flash" +family = "gemini-flash" +release_date = "2024-05-14" +last_updated = "2024-05-14" +attachment = true +reasoning = false +tool_call = true +temperature = true +knowledge = "2024-04" +open_weights = false + +[cost] +input = 0.04 +output = 0.15 + +[limit] +context = 1_000_000 +output = 1_000_000 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemini-1.5-flash.toml b/providers/deepinfra/models/google/gemini-1.5-flash.toml new file mode 100644 index 000000000..a9904cb76 --- /dev/null +++ b/providers/deepinfra/models/google/gemini-1.5-flash.toml @@ -0,0 +1,22 @@ +name = "gemini-1.5-flash" +family = "gemini-flash" +release_date = "2024-05-14" +last_updated = "2024-05-14" +attachment = true +reasoning = false +tool_call = true +temperature = true +knowledge = "2024-04" +open_weights = false + +[cost] +input = 0.07 +output = 0.30 + +[limit] +context = 1_000_000 +output = 1_000_000 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemini-2.5-flash.toml b/providers/deepinfra/models/google/gemini-2.5-flash.toml new file mode 100644 index 000000000..4867736b7 --- /dev/null +++ b/providers/deepinfra/models/google/gemini-2.5-flash.toml @@ -0,0 +1,23 @@ +name = "Gemini 2.5 Flash" +family = "gemini-flash" +release_date = "2025-03-20" +last_updated = "2025-06-05" +attachment = true +reasoning = true +temperature = true +knowledge = "2025-01" +tool_call = true +structured_output = true +open_weights = false + +[cost] +input = 0.30 +output = 2.50 + +[limit] +context = 1_000_000 +output = 1_000_000 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemini-2.5-pro.toml b/providers/deepinfra/models/google/gemini-2.5-pro.toml new file mode 100644 index 000000000..3ad893819 --- /dev/null +++ b/providers/deepinfra/models/google/gemini-2.5-pro.toml @@ -0,0 +1,23 @@ +name = "Gemini 2.5 Pro" +family = "gemini-pro" +release_date = "2025-03-20" +last_updated = "2025-06-05" +attachment = true +reasoning = true +temperature = true +knowledge = "2025-01" +tool_call = true +structured_output = true +open_weights = false + +[cost] +input = 1.25 +output = 10.00 + +[limit] +context = 1_000_000 +output = 1_000_000 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-3-12b-it.toml b/providers/deepinfra/models/google/gemma-3-12b-it.toml new file mode 100644 index 000000000..dc03b0c68 --- /dev/null +++ b/providers/deepinfra/models/google/gemma-3-12b-it.toml @@ -0,0 +1,23 @@ +name = "Gemma 3 12B" +family = "gemma" +release_date = "2025-03-13" +last_updated = "2025-03-13" +attachment = true +reasoning = false +temperature = true +knowledge = "2024-10" +tool_call = false +structured_output = true +open_weights = true + +[cost] +input = 0.04 +output = 0.13 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-3-27b-it.toml b/providers/deepinfra/models/google/gemma-3-27b-it.toml new file mode 100644 index 000000000..196f4a12f --- /dev/null +++ b/providers/deepinfra/models/google/gemma-3-27b-it.toml @@ -0,0 +1,23 @@ +name = "Gemma 3 27B" +family = "gemma" +release_date = "2025-03-12" +last_updated = "2025-03-12" +attachment = true +reasoning = false +temperature = true +knowledge = "2024-10" +tool_call = true +structured_output = true +open_weights = true + +[cost] +input = 0.08 +output = 0.16 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-3-4b-it.toml b/providers/deepinfra/models/google/gemma-3-4b-it.toml new file mode 100644 index 000000000..5b5c634d3 --- /dev/null +++ b/providers/deepinfra/models/google/gemma-3-4b-it.toml @@ -0,0 +1,22 @@ +name = "Gemma 3 4B" +family = "gemma" +release_date = "2025-03-13" +last_updated = "2025-03-13" +attachment = true +reasoning = false +temperature = true +knowledge = "2024-10" +tool_call = false +open_weights = true + +[cost] +input = 0.04 +output = 0.08 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml new file mode 100644 index 000000000..e5df00a48 --- /dev/null +++ b/providers/deepinfra/models/meta-llama/Llama-3.2-11B-Vision-Instruct.toml @@ -0,0 +1,22 @@ +name = "Llama 3.2 11B Vision Instruct" +family = "llama" +release_date = "2024-09-25" +last_updated = "2024-09-25" +attachment = true +reasoning = false +tool_call = true +temperature = true +knowledge = "2023-12" +open_weights = true + +[cost] +input = 0.05 +output = 0.05 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml index df433558b..239dab88f 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml @@ -5,6 +5,7 @@ last_updated = "2024-12-06" attachment = false reasoning = false tool_call = true +temperature = true open_weights = true [cost] @@ -13,7 +14,7 @@ output = 0.32 [limit] context = 131_072 -output = 16_384 +output = 131_072 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml index 1a2b88bbc..ff9c6f29a 100644 --- a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml +++ b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml @@ -5,6 +5,8 @@ last_updated = "2025-04-05" attachment = false reasoning = false tool_call = true +temperature = true +knowledge = "2024-08" open_weights = true [cost] @@ -12,8 +14,8 @@ input = 0.15 output = 0.60 [limit] -context = 1_000_000 -output = 16_384 +context = 1_048_576 +output = 1_048_576 [modalities] input = ["text", "image"] diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml index 8f7bab10f..751f9d9d2 100644 --- a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml +++ b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml @@ -2,9 +2,11 @@ name = "Llama 4 Scout 17B" family = "llama" release_date = "2025-04-05" last_updated = "2025-04-05" -attachment = false +attachment = true reasoning = false tool_call = true +temperature = true +knowledge = "2024-08" open_weights = true [cost] @@ -12,8 +14,8 @@ input = 0.08 output = 0.30 [limit] -context = 10_000_000 -output = 16_384 +context = 327_680 +output = 327_680 [modalities] input = ["text", "image"] diff --git a/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml new file mode 100644 index 000000000..d911f9f9d --- /dev/null +++ b/providers/deepinfra/models/meta-llama/Llama-Guard-4-12B.toml @@ -0,0 +1,22 @@ +name = "Meta Llama Guard 4 12B" +family = "llama" +release_date = "2025-01-01" +last_updated = "2025-01-01" +attachment = false +reasoning = false +temperature = true +tool_call = false +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.18 +output = 0.18 + +[limit] +context = 163_840 +output = 163_840 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml new file mode 100644 index 000000000..ccf9dec57 --- /dev/null +++ b/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml @@ -0,0 +1,21 @@ +name = "Meta-Llama-3-8B-Instruct" +family = "llama" +release_date = "2025-04-03" +last_updated = "2025-04-03" +attachment = false +reasoning = false +tool_call = false +temperature = true +open_weights = true + +[cost] +input = 0.03 +output = 0.04 + +[limit] +context = 8_192 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml similarity index 56% rename from providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml rename to providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml index 2edbcb221..6246f9cad 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct.toml +++ b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml @@ -1,10 +1,12 @@ -name = "Llama 3.1 70B" +name = "Meta-Llama-3.1-70B-Instruct-Turbo" family = "llama" -release_date = "2024-07-23" -last_updated = "2024-07-23" +release_date = "2024-12-06" +last_updated = "2024-12-06" attachment = false reasoning = false tool_call = true +temperature = true +knowledge = "2023-12" open_weights = true [cost] @@ -13,7 +15,7 @@ output = 0.40 [limit] context = 131_072 -output = 16_384 +output = 131_072 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml similarity index 61% rename from providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml rename to providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml index 79e674844..8922b133d 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-70B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml @@ -1,10 +1,11 @@ -name = "Llama 3.1 70B Turbo" +name = "Meta-Llama-3.1-70B-Instruct" family = "llama" -release_date = "2024-07-23" -last_updated = "2024-07-23" +release_date = "2024-04-17" +last_updated = "2024-04-17" attachment = false reasoning = false tool_call = true +temperature = true open_weights = true [cost] @@ -13,7 +14,7 @@ output = 0.40 [limit] context = 131_072 -output = 16_384 +output = 131_072 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml similarity index 76% rename from providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml rename to providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml index 3646a2d47..dd4c188c4 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml @@ -1,10 +1,11 @@ -name = "Llama 3.1 8B Turbo" +name = "Meta Llama 3.1 8B Instruct Turbo" family = "llama" release_date = "2024-07-23" last_updated = "2024-07-23" attachment = false reasoning = false tool_call = true +temperature = true open_weights = true [cost] @@ -13,7 +14,7 @@ output = 0.03 [limit] context = 131_072 -output = 16_384 +output = 131_072 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml similarity index 61% rename from providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml rename to providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml index ec3539991..118a03591 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.1-8B-Instruct.toml +++ b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml @@ -1,10 +1,11 @@ -name = "Llama 3.1 8B" +name = "Meta-Llama-3.1-8B-Instruct" family = "llama" -release_date = "2024-07-23" -last_updated = "2024-07-23" +release_date = "2025-06-11" +last_updated = "2025-06-11" attachment = false reasoning = false tool_call = true +temperature = true open_weights = true [cost] @@ -13,7 +14,7 @@ output = 0.05 [limit] context = 131_072 -output = 16_384 +output = 131_072 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/microsoft/phi-4.toml b/providers/deepinfra/models/microsoft/phi-4.toml new file mode 100644 index 000000000..8935e609b --- /dev/null +++ b/providers/deepinfra/models/microsoft/phi-4.toml @@ -0,0 +1,22 @@ +name = "Phi-4" +family = "phi" +release_date = "2024-12-11" +last_updated = "2024-12-11" +attachment = false +reasoning = false +temperature = true +knowledge = "2023-10" +tool_call = false +open_weights = true + +[cost] +input = 0.07 +output = 0.14 + +[limit] +context = 16_384 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml new file mode 100644 index 000000000..8b2a91488 --- /dev/null +++ b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml @@ -0,0 +1,21 @@ +name = "Mistral Nemo Instruct 2407" +family = "mistral-nemo" +release_date = "2024-07-25" +last_updated = "2026-03-17" +attachment = false +reasoning = false +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.02 +output = 0.04 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml new file mode 100644 index 000000000..6b518c5b2 --- /dev/null +++ b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml @@ -0,0 +1,21 @@ +name = "Mistral Small 24B Instruct (2501)" +family = "mistral-small" +release_date = "2025-12-29" +last_updated = "2026-01-10" +attachment = false +reasoning = false +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.05 +output = 0.08 + +[limit] +context = 32_768 +output = 32_768 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml new file mode 100644 index 000000000..e800ca8d8 --- /dev/null +++ b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml @@ -0,0 +1,21 @@ +name = "Mistral Small 3.2 24B Instruct (2506)" +family = "mistral-small" +release_date = "2025-06-20" +last_updated = "2026-03-17" +attachment = true +reasoning = false +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.07 +output = 0.20 + +[limit] +context = 128_000 +output = 128_000 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml b/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml new file mode 100644 index 000000000..8224aa953 --- /dev/null +++ b/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml @@ -0,0 +1,22 @@ +name = "Mixtral 8x7B Instruct v0.1" +family = "mixtral" +release_date = "2023-12-11" +last_updated = "2023-12-11" +attachment = false +reasoning = false +tool_call = true +temperature = true +knowledge = "2024-01" +open_weights = true + +[cost] +input = 0.54 +output = 0.54 + +[limit] +context = 32_768 +output = 32_768 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml index 180430bf1..ef0c0b0f4 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml @@ -1,12 +1,11 @@ -# https://deepinfra.com/moonshotai/Kimi-K2-Instruct-0905 name = "Kimi K2 0905" family = "kimi" release_date = "2025-09-05" last_updated = "2025-09-05" attachment = false reasoning = false -temperature = true tool_call = true +temperature = true knowledge = "2024-10" open_weights = true @@ -16,9 +15,9 @@ output = 2.00 cache_read = 0.15 [limit] -context = 262_144 -output = 262_144 +context = 131_072 +output = 131_072 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml index b455e5568..3e89d3257 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml @@ -4,8 +4,8 @@ release_date = "2025-11-06" last_updated = "2025-11-07" attachment = false reasoning = true -temperature = true tool_call = true +temperature = true knowledge = "2024-10" open_weights = true @@ -15,11 +15,11 @@ field = "reasoning_content" [cost] input = 0.47 output = 2.00 -cached_input = 0.141 +cache_read = 0.14 [limit] context = 131_072 -output = 32_768 +output = 131_072 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5-Turbo.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5-Turbo.toml new file mode 100644 index 000000000..3d9573493 --- /dev/null +++ b/providers/deepinfra/models/moonshotai/Kimi-K2.5-Turbo.toml @@ -0,0 +1,23 @@ +name = "Kimi K2.5 Turbo" +family = "kimi" +release_date = "2026-01-27" +last_updated = "2026-01-27" +attachment = false +reasoning = true +tool_call = true +temperature = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.60 +output = 3.00 +cache_read = 0.10 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml index 84183d853..c8c951a03 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml @@ -4,24 +4,24 @@ release_date = "2026-01-27" last_updated = "2026-01-27" attachment = true reasoning = true +tool_call = true structured_output = true temperature = true -tool_call = true knowledge = "2025-01" open_weights = true +[interleaved] +field = "reasoning_content" + [cost] -input = 0.50 -output = 2.80 -cached_input = 0.09 +input = 0.45 +output = 2.25 +cache_read = 0.07 [limit] context = 262_144 -output = 32_768 +output = 262_144 [modalities] -input = ["text", "image", "video"] +input = ["text", "image"] output = ["text"] - -[interleaved] -field = "reasoning_content" \ No newline at end of file diff --git a/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml b/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml new file mode 100644 index 000000000..beab01a63 --- /dev/null +++ b/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml @@ -0,0 +1,21 @@ +name = "Llama 3.1 Nemotron 70B Instruct" +family = "nemotron" +release_date = "2024-10-12" +last_updated = "2024-10-12" +attachment = false +reasoning = false +tool_call = true +temperature = true +open_weights = false + +[cost] +input = 1.20 +output = 1.20 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml new file mode 100644 index 000000000..b7ebafde8 --- /dev/null +++ b/providers/deepinfra/models/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5.toml @@ -0,0 +1,21 @@ +name = "Llama 3.3 Nemotron Super 49b V1.5" +family = "nemotron" +release_date = "2025-03-16" +last_updated = "2025-03-16" +attachment = false +reasoning = true +tool_call = false +temperature = true +open_weights = false + +[cost] +input = 0.10 +output = 0.40 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml new file mode 100644 index 000000000..35cc44e85 --- /dev/null +++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B.toml @@ -0,0 +1,23 @@ +name = "Nemotron 3 Super 120B A12B" +family = "nemotron" +release_date = "2026-03-11" +last_updated = "2026-03-11" +attachment = false +reasoning = true +tool_call = true +temperature = true +knowledge = "2024-04" +open_weights = true + +[cost] +input = 0.10 +output = 0.50 +cache_read = 0.10 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL.toml new file mode 100644 index 000000000..ca27dc226 --- /dev/null +++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL.toml @@ -0,0 +1,22 @@ +name = "NVIDIA-Nemotron-Nano-12B-v2-VL" +family = "nemotron" +release_date = "2025-03-15" +last_updated = "2026-02-04" +attachment = true +reasoning = true +tool_call = true +temperature = true +knowledge = "2025-01" +open_weights = true + +[cost] +input = 0.20 +output = 0.60 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml new file mode 100644 index 000000000..560d065d3 --- /dev/null +++ b/providers/deepinfra/models/nvidia/NVIDIA-Nemotron-Nano-9B-v2.toml @@ -0,0 +1,22 @@ +name = "Nemotron Nano 9B V2" +family = "nemotron" +release_date = "2025-08-18" +last_updated = "2025-08-18" +attachment = false +reasoning = true +tool_call = true +temperature = true +knowledge = "2024-09" +open_weights = true + +[cost] +input = 0.04 +output = 0.16 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml new file mode 100644 index 000000000..af802fa8b --- /dev/null +++ b/providers/deepinfra/models/nvidia/Nemotron-3-Nano-30B-A3B.toml @@ -0,0 +1,22 @@ +name = "Nemotron 3 Nano 30B A3B" +family = "nemotron" +release_date = "2024-12" +last_updated = "2024-12" +attachment = false +reasoning = true +tool_call = true +temperature = true +knowledge = "2024-09" +open_weights = true + +[cost] +input = 0.05 +output = 0.20 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml new file mode 100644 index 000000000..854e2e75b --- /dev/null +++ b/providers/deepinfra/models/openai/gpt-oss-120b-Turbo.toml @@ -0,0 +1,21 @@ +name = "GPT OSS 120B Turbo" +family = "gpt-oss" +release_date = "2025-08-05" +last_updated = "2025-08-05" +attachment = false +reasoning = true +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.15 +output = 0.60 + +[limit] +context = 131_072 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/openai/gpt-oss-120b.toml b/providers/deepinfra/models/openai/gpt-oss-120b.toml index 0889e79c2..0ecf093ef 100644 --- a/providers/deepinfra/models/openai/gpt-oss-120b.toml +++ b/providers/deepinfra/models/openai/gpt-oss-120b.toml @@ -1,23 +1,20 @@ -# https://deepinfra.com/openai/gpt-oss-120b - name = "GPT OSS 120B" family = "gpt-oss" release_date = "2025-08-05" last_updated = "2025-08-05" attachment = false reasoning = true -temperature = true tool_call = true +temperature = true open_weights = true [cost] -input = 0.05 -output = 0.24 +input = 0.04 +output = 0.19 [limit] context = 131_072 -# https://deepinfra.com/docs/advanced/max_tokens_limit -output = 16_384 +output = 131_072 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/openai/gpt-oss-20b.toml b/providers/deepinfra/models/openai/gpt-oss-20b.toml index 9342fee14..4f84265ea 100644 --- a/providers/deepinfra/models/openai/gpt-oss-20b.toml +++ b/providers/deepinfra/models/openai/gpt-oss-20b.toml @@ -1,13 +1,11 @@ -# https://deepinfra.com/openai/gpt-oss-20b - name = "GPT OSS 20B" family = "gpt-oss" release_date = "2025-08-05" last_updated = "2025-08-05" attachment = false reasoning = true -temperature = true tool_call = true +temperature = true open_weights = true [cost] @@ -16,8 +14,7 @@ output = 0.14 [limit] context = 131_072 -# https://deepinfra.com/docs/advanced/max_tokens_limit -output = 16_384 +output = 131_072 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml new file mode 100644 index 000000000..4e5ca5d74 --- /dev/null +++ b/providers/deepinfra/models/stepfun-ai/Step-3.5-Flash.toml @@ -0,0 +1,22 @@ +name = "Step-3.5-Flash" +family = "step" +release_date = "2026-04-06" +last_updated = "2026-04-06" +attachment = false +reasoning = false +tool_call = false +temperature = true +open_weights = false + +[cost] +input = 0.10 +output = 0.30 +cache_read = 0.02 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-4.5.toml b/providers/deepinfra/models/zai-org/GLM-4.5.toml deleted file mode 100644 index ae5ae7ec1..000000000 --- a/providers/deepinfra/models/zai-org/GLM-4.5.toml +++ /dev/null @@ -1,26 +0,0 @@ -name = "GLM-4.5" -family = "glm" -release_date = "2025-07-28" -last_updated = "2025-07-28" -attachment = false -reasoning = false -temperature = true -tool_call = true -knowledge = "2025-04" -open_weights = true - -# https://deepinfra.com/zai-org/GLM-4.5 -# It is now being redirected to GLM-4.6 -status = "deprecated" - -[cost] -input = 0.60 -output = 2.20 - -[limit] -context = 131_072 -output = 98_304 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-4.6.toml b/providers/deepinfra/models/zai-org/GLM-4.6.toml index 63eb5b3f7..693cb68ad 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.6.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.6.toml @@ -1,13 +1,12 @@ -# https://deepinfra.com/zai-org/GLM-4.6 -name = "GLM-4.6" +name = "GLM 4.6" family = "glm" release_date = "2025-09-30" last_updated = "2025-09-30" -knowledge = "2025-04" attachment = false reasoning = true -temperature = true tool_call = true +temperature = true +knowledge = "2025-04" open_weights = true [interleaved] @@ -19,9 +18,9 @@ output = 1.74 cache_read = 0.08 [limit] -context = 204_800 -output = 131_072 +context = 202_752 +output = 202_752 [modalities] input = ["text"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-4.6V.toml b/providers/deepinfra/models/zai-org/GLM-4.6V.toml index b3f6ce288..d91baba63 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.6V.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.6V.toml @@ -1,13 +1,12 @@ -# https://deepinfra.com/zai-org/GLM-4.6V -name = "GLM-4.6V" +name = "GLM 4.6V" family = "glm" release_date = "2025-09-30" last_updated = "2025-09-30" -knowledge = "2025-04" attachment = true reasoning = true -temperature = true tool_call = true +temperature = true +knowledge = "2025-04" open_weights = true [interleaved] @@ -18,9 +17,9 @@ input = 0.30 output = 0.90 [limit] -context = 204_800 +context = 131_072 output = 131_072 [modalities] input = ["text", "image"] -output = ["text"] \ No newline at end of file +output = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml index dbb42a2d8..0be4ef395 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml @@ -1,13 +1,12 @@ -# https://deepinfra.com/zai-org/GLM-4.7-Flash -name = "GLM-4.7-Flash" +name = "GLM 4.7 Flash" family = "glm-flash" release_date = "2026-01-19" last_updated = "2026-01-19" -knowledge = "2025-04" attachment = false reasoning = true -temperature = true tool_call = true +temperature = true +knowledge = "2025-04" open_weights = true [interleaved] @@ -16,11 +15,11 @@ field = "reasoning_content" [cost] input = 0.06 output = 0.40 +cache_read = 0.01 [limit] context = 202_752 -# https://deepinfra.com/docs/advanced/max_tokens_limit -output = 16_384 +output = 202_752 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml index 76b66a0c5..ffc25c089 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.7.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.7.toml @@ -1,27 +1,25 @@ -# https://deepinfra.com/zai-org/GLM-4.7 -name = "GLM-4.7" +name = "GLM 4.7" family = "glm" release_date = "2025-12-22" last_updated = "2025-12-22" -knowledge = "2025-04" attachment = false reasoning = true -temperature = true tool_call = true +temperature = true +knowledge = "2025-04" open_weights = true [interleaved] field = "reasoning_content" [cost] -input = 0.43 +input = 0.40 output = 1.75 cache_read = 0.08 [limit] -context = 202_752 -# https://deepinfra.com/docs/advanced/max_tokens_limit -output = 16_384 +context = 202_752 +output = 202_752 [modalities] input = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-5.toml b/providers/deepinfra/models/zai-org/GLM-5.toml index 257b2c673..5bcd66e73 100644 --- a/providers/deepinfra/models/zai-org/GLM-5.toml +++ b/providers/deepinfra/models/zai-org/GLM-5.toml @@ -1,27 +1,25 @@ -# https://deepinfra.com/zai-org/GLM-5 -name = "GLM-5" +name = "GLM 5" family = "glm" release_date = "2026-02-12" last_updated = "2026-02-12" -knowledge = "2025-12" attachment = false reasoning = true -temperature = true tool_call = true +temperature = true +knowledge = "2025-12" open_weights = true [interleaved] field = "reasoning_content" [cost] -input = 0.8 +input = 0.80 output = 2.56 cache_read = 0.16 [limit] -context = 202_752 -# https://deepinfra.com/docs/advanced/max_tokens_limit -output = 16_384 +context = 202_752 +output = 202_752 [modalities] input = ["text"] From a7a4bd301dc6908f66ece8e8d45ada37562bc8ca Mon Sep 17 00:00:00 2001 From: Christopher Tam Date: Tue, 7 Apr 2026 09:48:14 -0400 Subject: [PATCH 3/6] Be more selective about models --- packages/core/script/generate-deepinfra.ts | 43 +++++++++++++------ .../deepinfra/models/ByteDance/Seed-1.8.toml | 22 ---------- .../models/ByteDance/Seed-2.0-mini.toml | 22 ---------- .../models/ByteDance/Seed-2.0-pro.toml | 22 ---------- .../models/Gryphe/MythoMax-L2-13b.toml | 21 --------- .../NousResearch/Hermes-3-Llama-3.1-405B.toml | 21 --------- .../NousResearch/Hermes-3-Llama-3.1-70B.toml | 21 --------- .../PaddlePaddle/PaddleOCR-VL-0.9B.toml | 21 --------- .../models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml | 21 --------- .../models/Sao10K/L3.1-70B-Euryale-v2.2.toml | 21 --------- .../models/Sao10K/L3.3-70B-Euryale-v2.3.toml | 21 --------- .../models/allenai/Olmo-3.1-32B-Instruct.toml | 21 --------- .../models/allenai/olmOCR-2-7B-1025.toml | 21 --------- .../deepinfra/models/microsoft/phi-4.toml | 22 ---------- .../mistralai/Mistral-Nemo-Instruct-2407.toml | 21 --------- .../Mistral-Small-24B-Instruct-2501.toml | 21 --------- .../Mistral-Small-3.2-24B-Instruct-2506.toml | 21 --------- .../mistralai/Mixtral-8x7B-Instruct-v0.1.toml | 22 ---------- 18 files changed, 29 insertions(+), 376 deletions(-) delete mode 100644 providers/deepinfra/models/ByteDance/Seed-1.8.toml delete mode 100644 providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml delete mode 100644 providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml delete mode 100644 providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml delete mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml delete mode 100644 providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml delete mode 100644 providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml delete mode 100644 providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml delete mode 100644 providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml delete mode 100644 providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml delete mode 100644 providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml delete mode 100644 providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml delete mode 100644 providers/deepinfra/models/microsoft/phi-4.toml delete mode 100644 providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml delete mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml delete mode 100644 providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml delete mode 100644 providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml diff --git a/packages/core/script/generate-deepinfra.ts b/packages/core/script/generate-deepinfra.ts index 963748285..af380b1c9 100644 --- a/packages/core/script/generate-deepinfra.ts +++ b/packages/core/script/generate-deepinfra.ts @@ -16,33 +16,46 @@ import { ModelFamilyValues } from "../src/family.js"; const API_ENDPOINT = "https://api.deepinfra.com/v1/openai/models"; -const PROVIDER_DENYLIST: string[] = [ - "BAAI", - "Bria", - "Clarity", - "ClarityAI", - "intfloat", - "sentence-transformers", - "shibing624", - "stabilityai", - "thenlper", - "Wan-AI", +// Allowlist of providers to include - only these providers will be tracked +// This is intentionally restrictive since most models shouldn't be included +const PROVIDER_ALLOWLIST: string[] = [ + "anthropic", + "deepseek-ai", + "google", + "meta-llama", + "MiniMaxAI", + "moonshotai", + "nvidia", + "openai", + "Qwen", + "stepfun-ai", + "zai-org", ]; +// Models/patterns to skip even from allowed providers (embeddings, image gen, etc.) const MODEL_REGEX_DENYLIST: RegExp[] = [ + // Avoid most embedding models /embed/i, + /(^|\/)FLUX/i, - /Seedream/i, /Janus-Pro/i, /p-image/i, + + // Avoid any Qwen image generation models /Qwen-Image/i, + + // Qwen 2.5 models are obsolete + /Qwen2.5/i, + /Seedream/i, ]; function shouldSkipModel(modelId: string): boolean { const provider = modelId.split("/")[0]; - if (provider && PROVIDER_DENYLIST.includes(provider)) { + // Skip if provider is not in the allowlist + if (!provider || !PROVIDER_ALLOWLIST.includes(provider)) { return true; } + // Also skip models matching excluded patterns (embeddings, image gen, etc.) return MODEL_REGEX_DENYLIST.some((pattern) => pattern.test(modelId)); } @@ -433,7 +446,9 @@ function detectChanges( if (oldPrice === 0 && newPrice === undefined) return false; if (oldPrice !== undefined && newPrice !== undefined) { - return (oldPrice as number).toFixed(2) !== (newPrice as number).toFixed(2); + return ( + (oldPrice as number).toFixed(2) !== (newPrice as number).toFixed(2) + ); } return oldPrice !== newPrice; diff --git a/providers/deepinfra/models/ByteDance/Seed-1.8.toml b/providers/deepinfra/models/ByteDance/Seed-1.8.toml deleted file mode 100644 index 1b2a295ff..000000000 --- a/providers/deepinfra/models/ByteDance/Seed-1.8.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Seed 1.8" -family = "seed" -release_date = "2025-12-18" -last_updated = "2026-02-25" -attachment = true -reasoning = true -tool_call = true -temperature = true -open_weights = false - -[cost] -input = 0.25 -output = 2.00 -cache_read = 0.05 - -[limit] -context = 256_000 -output = 256_000 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml b/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml deleted file mode 100644 index dd252063f..000000000 --- a/providers/deepinfra/models/ByteDance/Seed-2.0-mini.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Seed-2.0-mini" -family = "seed" -release_date = "2026-02-26" -last_updated = "2026-02-26" -attachment = true -reasoning = true -tool_call = true -temperature = true -open_weights = false - -[cost] -input = 0.10 -output = 0.40 -cache_read = 0.02 - -[limit] -context = 256_000 -output = 256_000 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml b/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml deleted file mode 100644 index 6f8c1e024..000000000 --- a/providers/deepinfra/models/ByteDance/Seed-2.0-pro.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Seed-2.0-pro" -family = "seed" -release_date = "2026-02-14" -last_updated = "2026-02-14" -attachment = true -reasoning = true -tool_call = true -temperature = true -open_weights = false - -[cost] -input = 0.50 -output = 3.00 -cache_read = 0.10 - -[limit] -context = 256_000 -output = 256_000 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml b/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml deleted file mode 100644 index 473d3ebbc..000000000 --- a/providers/deepinfra/models/Gryphe/MythoMax-L2-13b.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "MythoMax 13B" -family = "o" -release_date = "2024-04-25" -last_updated = "2024-04-25" -attachment = false -reasoning = false -tool_call = false -temperature = true -open_weights = true - -[cost] -input = 0.40 -output = 0.40 - -[limit] -context = 4_096 -output = 4_096 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml deleted file mode 100644 index 27aebb6a0..000000000 --- a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-405B.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Hermes 3 Llama 3.1 405B" -family = "nousresearch" -release_date = "2024-08-16" -last_updated = "2024-08-16" -attachment = false -reasoning = false -tool_call = false -temperature = true -open_weights = true - -[cost] -input = 1.00 -output = 1.00 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml b/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml deleted file mode 100644 index f9a61481e..000000000 --- a/providers/deepinfra/models/NousResearch/Hermes-3-Llama-3.1-70B.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Hermes 3 Llama 3.1 70B" -family = "nousresearch" -release_date = "2024-08-18" -last_updated = "2026-03-15" -attachment = false -reasoning = false -tool_call = false -temperature = true -open_weights = true - -[cost] -input = 0.30 -output = 0.30 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml b/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml deleted file mode 100644 index 62a9150db..000000000 --- a/providers/deepinfra/models/PaddlePaddle/PaddleOCR-VL-0.9B.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "PaddleOCR-VL-0.9B" -family = "o" -release_date = "2025-10-22" -last_updated = "2025-10-22" -attachment = true -reasoning = false -tool_call = false -temperature = true -open_weights = true - -[cost] -input = 0.14 -output = 0.80 - -[limit] -context = 16_384 -output = 16_384 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml b/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml deleted file mode 100644 index 2bdebc4f7..000000000 --- a/providers/deepinfra/models/Sao10K/L3-8B-Lunaris-v1-Turbo.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "L3 8B Lunaris v1 Turbo" -family = "o" -release_date = "2024-08-13" -last_updated = "2026-03-15" -attachment = false -reasoning = false -tool_call = false -temperature = true -open_weights = true - -[cost] -input = 0.04 -output = 0.05 - -[limit] -context = 8_192 -output = 8_192 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml b/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml deleted file mode 100644 index 7aeb7931b..000000000 --- a/providers/deepinfra/models/Sao10K/L3.1-70B-Euryale-v2.2.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "L3.1 70B Euryale v2.2" -family = "o" -release_date = "2024-09-19" -last_updated = "2024-09-19" -attachment = false -reasoning = false -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.85 -output = 0.85 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml b/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml deleted file mode 100644 index daccc1e8e..000000000 --- a/providers/deepinfra/models/Sao10K/L3.3-70B-Euryale-v2.3.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "L3.3 70B Euryale v2.3" -family = "o" -release_date = "2024-12-06" -last_updated = "2024-12-06" -attachment = false -reasoning = false -tool_call = false -temperature = true -open_weights = false - -[cost] -input = 0.85 -output = 0.85 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml b/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml deleted file mode 100644 index f9c5727ea..000000000 --- a/providers/deepinfra/models/allenai/Olmo-3.1-32B-Instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Olmo 3.1 32B Instruct" -family = "allenai" -release_date = "2026-01-07" -last_updated = "2026-03-15" -attachment = false -reasoning = false -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.20 -output = 0.60 - -[limit] -context = 65_536 -output = 65_536 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml b/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml deleted file mode 100644 index fb09bf38a..000000000 --- a/providers/deepinfra/models/allenai/olmOCR-2-7B-1025.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "olmOCR-2-7B-1025" -family = "allenai" -release_date = "2025-10-22" -last_updated = "2025-10-22" -attachment = true -reasoning = false -tool_call = false -temperature = true -open_weights = true - -[cost] -input = 0.09 -output = 0.19 - -[limit] -context = 16_384 -output = 16_384 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/microsoft/phi-4.toml b/providers/deepinfra/models/microsoft/phi-4.toml deleted file mode 100644 index 8935e609b..000000000 --- a/providers/deepinfra/models/microsoft/phi-4.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Phi-4" -family = "phi" -release_date = "2024-12-11" -last_updated = "2024-12-11" -attachment = false -reasoning = false -temperature = true -knowledge = "2023-10" -tool_call = false -open_weights = true - -[cost] -input = 0.07 -output = 0.14 - -[limit] -context = 16_384 -output = 16_384 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml b/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml deleted file mode 100644 index 8b2a91488..000000000 --- a/providers/deepinfra/models/mistralai/Mistral-Nemo-Instruct-2407.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Mistral Nemo Instruct 2407" -family = "mistral-nemo" -release_date = "2024-07-25" -last_updated = "2026-03-17" -attachment = false -reasoning = false -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.02 -output = 0.04 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml b/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml deleted file mode 100644 index 6b518c5b2..000000000 --- a/providers/deepinfra/models/mistralai/Mistral-Small-24B-Instruct-2501.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Mistral Small 24B Instruct (2501)" -family = "mistral-small" -release_date = "2025-12-29" -last_updated = "2026-01-10" -attachment = false -reasoning = false -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.05 -output = 0.08 - -[limit] -context = 32_768 -output = 32_768 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml b/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml deleted file mode 100644 index e800ca8d8..000000000 --- a/providers/deepinfra/models/mistralai/Mistral-Small-3.2-24B-Instruct-2506.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Mistral Small 3.2 24B Instruct (2506)" -family = "mistral-small" -release_date = "2025-06-20" -last_updated = "2026-03-17" -attachment = true -reasoning = false -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.07 -output = 0.20 - -[limit] -context = 128_000 -output = 128_000 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml b/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml deleted file mode 100644 index 8224aa953..000000000 --- a/providers/deepinfra/models/mistralai/Mixtral-8x7B-Instruct-v0.1.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Mixtral 8x7B Instruct v0.1" -family = "mixtral" -release_date = "2023-12-11" -last_updated = "2023-12-11" -attachment = false -reasoning = false -tool_call = true -temperature = true -knowledge = "2024-01" -open_weights = true - -[cost] -input = 0.54 -output = 0.54 - -[limit] -context = 32_768 -output = 32_768 - -[modalities] -input = ["text"] -output = ["text"] From ed7c6dff744c01ab496a5e1997f8f5dfaf0e40fe Mon Sep 17 00:00:00 2001 From: Christopher Tam Date: Tue, 7 Apr 2026 09:58:18 -0400 Subject: [PATCH 4/6] Filter out old Google and Llama models --- packages/core/script/generate-deepinfra.ts | 11 +++++++++ .../models/Qwen/Qwen2.5-72B-Instruct.toml | 21 ----------------- .../models/Qwen/Qwen2.5-VL-32B-Instruct.toml | 21 ----------------- .../models/google/gemini-1.5-flash-8b.toml | 22 ------------------ .../models/google/gemini-1.5-flash.toml | 22 ------------------ .../models/google/gemini-2.5-flash.toml | 23 ------------------- .../models/google/gemini-2.5-pro.toml | 23 ------------------- .../models/google/gemma-3-12b-it.toml | 23 ------------------- .../models/google/gemma-3-27b-it.toml | 23 ------------------- .../models/google/gemma-3-4b-it.toml | 22 ------------------ .../meta-llama/Meta-Llama-3-8B-Instruct.toml | 21 ----------------- .../Meta-Llama-3.1-70B-Instruct-Turbo.toml | 22 ------------------ .../Meta-Llama-3.1-70B-Instruct.toml | 21 ----------------- .../Meta-Llama-3.1-8B-Instruct-Turbo.toml | 21 ----------------- .../Meta-Llama-3.1-8B-Instruct.toml | 21 ----------------- .../Llama-3.1-Nemotron-70B-Instruct.toml | 21 ----------------- 16 files changed, 11 insertions(+), 327 deletions(-) delete mode 100644 providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml delete mode 100644 providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml delete mode 100644 providers/deepinfra/models/google/gemini-1.5-flash-8b.toml delete mode 100644 providers/deepinfra/models/google/gemini-1.5-flash.toml delete mode 100644 providers/deepinfra/models/google/gemini-2.5-flash.toml delete mode 100644 providers/deepinfra/models/google/gemini-2.5-pro.toml delete mode 100644 providers/deepinfra/models/google/gemma-3-12b-it.toml delete mode 100644 providers/deepinfra/models/google/gemma-3-27b-it.toml delete mode 100644 providers/deepinfra/models/google/gemma-3-4b-it.toml delete mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml delete mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml delete mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml delete mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml delete mode 100644 providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml delete mode 100644 providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml diff --git a/packages/core/script/generate-deepinfra.ts b/packages/core/script/generate-deepinfra.ts index af380b1c9..5be5a3855 100644 --- a/packages/core/script/generate-deepinfra.ts +++ b/packages/core/script/generate-deepinfra.ts @@ -38,6 +38,16 @@ const MODEL_REGEX_DENYLIST: RegExp[] = [ /embed/i, /(^|\/)FLUX/i, + + // Old Google models + /gemini-1.5/i, + /gemini-2.5/i, + /gemma-3/i, + + // Old Llama models + /Llama-3-/i, + /Llama-3.1-/i, + /Janus-Pro/i, /p-image/i, @@ -46,6 +56,7 @@ const MODEL_REGEX_DENYLIST: RegExp[] = [ // Qwen 2.5 models are obsolete /Qwen2.5/i, + /Seedream/i, ]; diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml deleted file mode 100644 index 83b19b273..000000000 --- a/providers/deepinfra/models/Qwen/Qwen2.5-72B-Instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Qwen 2.5 72B Instruct" -family = "qwen" -release_date = "2024-09-18" -last_updated = "2025-11-25" -attachment = false -reasoning = false -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.12 -output = 0.39 - -[limit] -context = 32_768 -output = 32_768 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml b/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml deleted file mode 100644 index f375659e6..000000000 --- a/providers/deepinfra/models/Qwen/Qwen2.5-VL-32B-Instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Qwen 2.5 VL 32B Instruct" -family = "qwen" -release_date = "2025-03-24" -last_updated = "2025-11-25" -attachment = true -reasoning = false -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.20 -output = 0.60 - -[limit] -context = 128_000 -output = 128_000 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml b/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml deleted file mode 100644 index 465ac3578..000000000 --- a/providers/deepinfra/models/google/gemini-1.5-flash-8b.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Gemini 1.5 Flash" -family = "gemini-flash" -release_date = "2024-05-14" -last_updated = "2024-05-14" -attachment = true -reasoning = false -tool_call = true -temperature = true -knowledge = "2024-04" -open_weights = false - -[cost] -input = 0.04 -output = 0.15 - -[limit] -context = 1_000_000 -output = 1_000_000 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/google/gemini-1.5-flash.toml b/providers/deepinfra/models/google/gemini-1.5-flash.toml deleted file mode 100644 index a9904cb76..000000000 --- a/providers/deepinfra/models/google/gemini-1.5-flash.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "gemini-1.5-flash" -family = "gemini-flash" -release_date = "2024-05-14" -last_updated = "2024-05-14" -attachment = true -reasoning = false -tool_call = true -temperature = true -knowledge = "2024-04" -open_weights = false - -[cost] -input = 0.07 -output = 0.30 - -[limit] -context = 1_000_000 -output = 1_000_000 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/google/gemini-2.5-flash.toml b/providers/deepinfra/models/google/gemini-2.5-flash.toml deleted file mode 100644 index 4867736b7..000000000 --- a/providers/deepinfra/models/google/gemini-2.5-flash.toml +++ /dev/null @@ -1,23 +0,0 @@ -name = "Gemini 2.5 Flash" -family = "gemini-flash" -release_date = "2025-03-20" -last_updated = "2025-06-05" -attachment = true -reasoning = true -temperature = true -knowledge = "2025-01" -tool_call = true -structured_output = true -open_weights = false - -[cost] -input = 0.30 -output = 2.50 - -[limit] -context = 1_000_000 -output = 1_000_000 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/google/gemini-2.5-pro.toml b/providers/deepinfra/models/google/gemini-2.5-pro.toml deleted file mode 100644 index 3ad893819..000000000 --- a/providers/deepinfra/models/google/gemini-2.5-pro.toml +++ /dev/null @@ -1,23 +0,0 @@ -name = "Gemini 2.5 Pro" -family = "gemini-pro" -release_date = "2025-03-20" -last_updated = "2025-06-05" -attachment = true -reasoning = true -temperature = true -knowledge = "2025-01" -tool_call = true -structured_output = true -open_weights = false - -[cost] -input = 1.25 -output = 10.00 - -[limit] -context = 1_000_000 -output = 1_000_000 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-3-12b-it.toml b/providers/deepinfra/models/google/gemma-3-12b-it.toml deleted file mode 100644 index dc03b0c68..000000000 --- a/providers/deepinfra/models/google/gemma-3-12b-it.toml +++ /dev/null @@ -1,23 +0,0 @@ -name = "Gemma 3 12B" -family = "gemma" -release_date = "2025-03-13" -last_updated = "2025-03-13" -attachment = true -reasoning = false -temperature = true -knowledge = "2024-10" -tool_call = false -structured_output = true -open_weights = true - -[cost] -input = 0.04 -output = 0.13 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-3-27b-it.toml b/providers/deepinfra/models/google/gemma-3-27b-it.toml deleted file mode 100644 index 196f4a12f..000000000 --- a/providers/deepinfra/models/google/gemma-3-27b-it.toml +++ /dev/null @@ -1,23 +0,0 @@ -name = "Gemma 3 27B" -family = "gemma" -release_date = "2025-03-12" -last_updated = "2025-03-12" -attachment = true -reasoning = false -temperature = true -knowledge = "2024-10" -tool_call = true -structured_output = true -open_weights = true - -[cost] -input = 0.08 -output = 0.16 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-3-4b-it.toml b/providers/deepinfra/models/google/gemma-3-4b-it.toml deleted file mode 100644 index 5b5c634d3..000000000 --- a/providers/deepinfra/models/google/gemma-3-4b-it.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Gemma 3 4B" -family = "gemma" -release_date = "2025-03-13" -last_updated = "2025-03-13" -attachment = true -reasoning = false -temperature = true -knowledge = "2024-10" -tool_call = false -open_weights = true - -[cost] -input = 0.04 -output = 0.08 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text", "image"] -output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml deleted file mode 100644 index ccf9dec57..000000000 --- a/providers/deepinfra/models/meta-llama/Meta-Llama-3-8B-Instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Meta-Llama-3-8B-Instruct" -family = "llama" -release_date = "2025-04-03" -last_updated = "2025-04-03" -attachment = false -reasoning = false -tool_call = false -temperature = true -open_weights = true - -[cost] -input = 0.03 -output = 0.04 - -[limit] -context = 8_192 -output = 8_192 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml deleted file mode 100644 index 6246f9cad..000000000 --- a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "Meta-Llama-3.1-70B-Instruct-Turbo" -family = "llama" -release_date = "2024-12-06" -last_updated = "2024-12-06" -attachment = false -reasoning = false -tool_call = true -temperature = true -knowledge = "2023-12" -open_weights = true - -[cost] -input = 0.40 -output = 0.40 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml deleted file mode 100644 index 8922b133d..000000000 --- a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-70B-Instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Meta-Llama-3.1-70B-Instruct" -family = "llama" -release_date = "2024-04-17" -last_updated = "2024-04-17" -attachment = false -reasoning = false -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.40 -output = 0.40 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml deleted file mode 100644 index dd4c188c4..000000000 --- a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Meta Llama 3.1 8B Instruct Turbo" -family = "llama" -release_date = "2024-07-23" -last_updated = "2024-07-23" -attachment = false -reasoning = false -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.02 -output = 0.03 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml b/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml deleted file mode 100644 index 118a03591..000000000 --- a/providers/deepinfra/models/meta-llama/Meta-Llama-3.1-8B-Instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Meta-Llama-3.1-8B-Instruct" -family = "llama" -release_date = "2025-06-11" -last_updated = "2025-06-11" -attachment = false -reasoning = false -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.02 -output = 0.05 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml b/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml deleted file mode 100644 index beab01a63..000000000 --- a/providers/deepinfra/models/nvidia/Llama-3.1-Nemotron-70B-Instruct.toml +++ /dev/null @@ -1,21 +0,0 @@ -name = "Llama 3.1 Nemotron 70B Instruct" -family = "nemotron" -release_date = "2024-10-12" -last_updated = "2024-10-12" -attachment = false -reasoning = false -tool_call = true -temperature = true -open_weights = false - -[cost] -input = 1.20 -output = 1.20 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] From 19185560f2cc947171b21a02dfa32a638d7e0b7c Mon Sep 17 00:00:00 2001 From: Christopher Tam Date: Tue, 7 Apr 2026 10:04:39 -0400 Subject: [PATCH 5/6] Remove obsolete DeepSeek models --- packages/core/script/generate-deepinfra.ts | 4 ++++ .../models/deepseek-ai/DeepSeek-V3-0324.toml | 23 ------------------- .../deepseek-ai/DeepSeek-V3.1-Terminus.toml | 22 ------------------ .../models/deepseek-ai/DeepSeek-V3.1.toml | 22 ------------------ .../models/deepseek-ai/DeepSeek-V3.toml | 22 ------------------ 5 files changed, 4 insertions(+), 89 deletions(-) delete mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml delete mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml delete mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml delete mode 100644 providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml diff --git a/packages/core/script/generate-deepinfra.ts b/packages/core/script/generate-deepinfra.ts index 5be5a3855..28b89738f 100644 --- a/packages/core/script/generate-deepinfra.ts +++ b/packages/core/script/generate-deepinfra.ts @@ -39,6 +39,10 @@ const MODEL_REGEX_DENYLIST: RegExp[] = [ /(^|\/)FLUX/i, + // Old DeepSeek models + /DeepSeek-V3(?!\.)/i, + /DeepSeek-V3.1/i, + // Old Google models /gemini-1.5/i, /gemini-2.5/i, diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml deleted file mode 100644 index c84aaf23b..000000000 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3-0324.toml +++ /dev/null @@ -1,23 +0,0 @@ -name = "DeepSeek V3 0324" -family = "deepseek" -release_date = "2025-03-24" -last_updated = "2025-03-24" -attachment = false -reasoning = false -tool_call = true -temperature = true -knowledge = "2025-01" -open_weights = true - -[cost] -input = 0.20 -output = 0.77 -cache_read = 0.14 - -[limit] -context = 163_840 -output = 163_840 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml deleted file mode 100644 index 3c808740d..000000000 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1-Terminus.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "DeepSeek V3.1 Terminus" -family = "deepseek" -release_date = "2025-09-22" -last_updated = "2025-10-15" -attachment = false -reasoning = true -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.21 -output = 0.79 -cache_read = 0.13 - -[limit] -context = 163_840 -output = 163_840 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml deleted file mode 100644 index 975c04df2..000000000 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.1.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "DeepSeek V3.1" -family = "deepseek" -release_date = "2025-08-21" -last_updated = "2025-08-21" -attachment = false -reasoning = true -tool_call = true -temperature = true -open_weights = true - -[cost] -input = 0.21 -output = 0.79 -cache_read = 0.13 - -[limit] -context = 163_840 -output = 163_840 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml deleted file mode 100644 index b2a43d543..000000000 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V3.toml +++ /dev/null @@ -1,22 +0,0 @@ -name = "DeepSeek-V3" -family = "deepseek" -release_date = "2024-12-26" -last_updated = "2025-01-20" -attachment = false -reasoning = false -tool_call = true -temperature = true -knowledge = "2024-07" -open_weights = true - -[cost] -input = 0.32 -output = 0.89 - -[limit] -context = 163_840 -output = 163_840 - -[modalities] -input = ["text"] -output = ["text"] From 678f375f9a2230872b10e3b035d00a221a92ac06 Mon Sep 17 00:00:00 2001 From: Christopher Tam Date: Wed, 8 Apr 2026 15:20:25 -0400 Subject: [PATCH 6/6] Update with new Gemma 4 & GLM 5.1 models --- .../models/google/gemma-4-26B-A4B-it.toml | 22 ++++++++++++++++ .../models/google/gemma-4-31B-it.toml | 22 ++++++++++++++++ .../moonshotai/Kimi-K2-Instruct-0905.toml | 23 ---------------- .../models/moonshotai/Kimi-K2-Thinking.toml | 26 ------------------- .../deepinfra/models/zai-org/GLM-5.1.toml | 22 ++++++++++++++++ 5 files changed, 66 insertions(+), 49 deletions(-) create mode 100644 providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml create mode 100644 providers/deepinfra/models/google/gemma-4-31B-it.toml delete mode 100644 providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml delete mode 100644 providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml create mode 100644 providers/deepinfra/models/zai-org/GLM-5.1.toml diff --git a/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml new file mode 100644 index 000000000..93d91acdf --- /dev/null +++ b/providers/deepinfra/models/google/gemma-4-26B-A4B-it.toml @@ -0,0 +1,22 @@ +name = "Gemma 4 26B A4B" +family = "gemma" +release_date = "2026-04-02" +last_updated = "2026-04-02" +attachment = true +reasoning = true +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.08 +output = 0.35 +cache_read = 0.01 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/google/gemma-4-31B-it.toml b/providers/deepinfra/models/google/gemma-4-31B-it.toml new file mode 100644 index 000000000..4ea31dfa4 --- /dev/null +++ b/providers/deepinfra/models/google/gemma-4-31B-it.toml @@ -0,0 +1,22 @@ +name = "Gemma 4 31B" +family = "gemma" +release_date = "2026-04-02" +last_updated = "2026-04-02" +attachment = true +reasoning = true +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 0.13 +output = 0.38 +cache_read = 0.02 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml deleted file mode 100644 index ef0c0b0f4..000000000 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Instruct-0905.toml +++ /dev/null @@ -1,23 +0,0 @@ -name = "Kimi K2 0905" -family = "kimi" -release_date = "2025-09-05" -last_updated = "2025-09-05" -attachment = false -reasoning = false -tool_call = true -temperature = true -knowledge = "2024-10" -open_weights = true - -[cost] -input = 0.40 -output = 2.00 -cache_read = 0.15 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml b/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml deleted file mode 100644 index 3e89d3257..000000000 --- a/providers/deepinfra/models/moonshotai/Kimi-K2-Thinking.toml +++ /dev/null @@ -1,26 +0,0 @@ -name = "Kimi K2 Thinking" -family = "kimi-thinking" -release_date = "2025-11-06" -last_updated = "2025-11-07" -attachment = false -reasoning = true -tool_call = true -temperature = true -knowledge = "2024-10" -open_weights = true - -[interleaved] -field = "reasoning_content" - -[cost] -input = 0.47 -output = 2.00 -cache_read = 0.14 - -[limit] -context = 131_072 -output = 131_072 - -[modalities] -input = ["text"] -output = ["text"] diff --git a/providers/deepinfra/models/zai-org/GLM-5.1.toml b/providers/deepinfra/models/zai-org/GLM-5.1.toml new file mode 100644 index 000000000..29bebcba6 --- /dev/null +++ b/providers/deepinfra/models/zai-org/GLM-5.1.toml @@ -0,0 +1,22 @@ +name = "GLM-5.1" +family = "glm" +release_date = "2026-03-27" +last_updated = "2026-03-27" +attachment = false +reasoning = true +tool_call = true +temperature = true +open_weights = true + +[cost] +input = 1.40 +output = 4.40 +cache_read = 0.26 + +[limit] +context = 202_752 +output = 202_752 + +[modalities] +input = ["text"] +output = ["text"]