Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions llms/openai/internal/openaiclient/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,15 @@ func (r ChatRequest) MarshalJSON() ([]byte, error) {
Alias: (*Alias)(&r),
}

// Handle temperature for reasoning models
if isReasoningModel(r.Model) {
// Reasoning models (GPT-5, o1, o3) only accept temperature=1 (default)
// Omit temperature field to let API use its default value
// Handle temperature for reasoning models.
// When reasoning is enabled (reasoning_effort !== "none"), this API only
// accepts the default temperature behavior, so omit the temperature field.
// When reasoning_effort === "none", temperature may be provided explicitly,
// including temperature: 0.
// https://developers.openai.com/api/docs/guides/latest-model#gpt-54-parameter-compatibility
if isReasoningModel(r.Model) && r.ReasoningEffort != "none" {
aux.Temperature = nil
} else {
// For regular models, always send temperature
aux.Temperature = &r.Temperature
}

Expand All @@ -131,17 +133,17 @@ func (r ChatRequest) MarshalJSON() ([]byte, error) {
}

// isReasoningModel returns true if the model is a reasoning model that has temperature constraints.
// Reasoning models (GPT-5, o1, o3) only accept temperature=1 and reject other values.
// Reasoning models only accept temperature=1 and reject other values unless reasoning is disabled.
func isReasoningModel(model string) bool {
// o1 series: o1-preview, o1-mini
if strings.HasPrefix(model, "o1-") {
// o1 series: o1, o1-mini, o1-preview, …
if model == "o1" || strings.HasPrefix(model, "o1-") {
return true
}
// o3 series: o3, o3-mini (note: "o3" without suffix is also valid)
// o3 series: o3, o3-mini, …
if model == "o3" || strings.HasPrefix(model, "o3-") {
return true
}
// GPT-5 series (when released)
// GPT-5 series
if strings.HasPrefix(model, "gpt-5") {
return true
}
Expand Down
94 changes: 58 additions & 36 deletions llms/openai/openaillm.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,20 @@ const (
RoleTool = "tool"
)

// ModelCapability defines what a model supports
// ModelCapability defines what a model supports.
type ModelCapability struct {
Pattern string // Regex pattern to match model names
SupportsSystem bool // If true, supports system messages
SupportsThinking bool // If true, supports reasoning/thinking
SupportsCaching bool // If true, supports prompt caching
// Add more capabilities as needed
Pattern string // Regex pattern to match model names
SupportsSystem bool // If true, supports system messages
SupportsThinking bool // If true, is a reasoning/thinking model
SupportsCaching bool // If true, supports prompt caching
SupportedReasoningEfforts []string // Valid reasoning_effort values for this model variant; Support and allowed values differ across model families (e.g. gpt-5.4+ accepts none/low/medium/high/xhigh, earlier o-series do not accept the parameter at all)
}

// modelCapabilities defines capabilities for different model patterns
// reasoningEffortsGPT54Plus is the set of valid reasoning_effort values for gpt-5.4 and later.
var reasoningEffortsGPT54Plus = []string{"none", "low", "medium", "high", "xhigh"}

// modelCapabilities defines capabilities for different model patterns.
// Patterns are evaluated in order; more specific patterns must appear first.
var modelCapabilities = []ModelCapability{
// OpenAI reasoning models (o1, o3 series) - no system message support
{
Expand All @@ -45,6 +49,14 @@ var modelCapabilities = []ModelCapability{
SupportsThinking: true,
SupportsCaching: false,
},
// gpt-5.4 and later: supports reasoning_effort
{
Pattern: `(?i)^gpt-5\.[4-9]`,
SupportsSystem: true,
SupportsThinking: true,
SupportsCaching: false,
SupportedReasoningEfforts: reasoningEffortsGPT54Plus,
},
// GPT-4 models
{
Pattern: `(?i)^gpt-4`, // Matches gpt-4, gpt-4-turbo, etc.
Expand All @@ -59,7 +71,16 @@ var modelCapabilities = []ModelCapability{
SupportsThinking: false,
SupportsCaching: false,
},
// Future models can be added here
}

// supportsReasoningEffort reports whether the given effort value is valid for the model.
func supportsReasoningEffort(caps ModelCapability, effort string) bool {
for _, e := range caps.SupportedReasoningEfforts {
if e == effort {
return true
}
}
return false
}

// getModelCapabilities returns the capabilities for a given model
Expand Down Expand Up @@ -210,40 +231,41 @@ func (o *LLM) GenerateContent(ctx context.Context, messages []llms.MessageConten
}
}

// Extract reasoning effort for thinking models
// Note: OpenAI o1/o3 models have built-in reasoning and don't support reasoning_effort parameter
// This is kept for future models that might support it (like GPT-5)
// Set reasoning_effort for GPT-5 models. The valid set varies by sub-model:
// gpt-5.1: none, low, medium, high
// gpt-5.4: none, low, medium, high, xhigh
// Unsupported values are silently omitted rather than sent to the API.
var reasoningEffort string
// Commented out for now since current o1 models don't support this parameter
/*
if opts.Metadata != nil {
if config, ok := opts.Metadata["thinking_config"].(*llms.ThinkingConfig); ok {
// Map thinking mode to reasoning effort
switch config.Mode {
case llms.ThinkingModeLow:
reasoningEffort = "low"
case llms.ThinkingModeMedium:
reasoningEffort = "medium"
case llms.ThinkingModeHigh:
reasoningEffort = "high"
}
if len(modelCaps.SupportedReasoningEfforts) > 0 {
if config := llms.GetThinkingConfig(&opts); config != nil {
var candidate string
switch config.Mode {
case llms.ThinkingModeNone:
candidate = "none"
case llms.ThinkingModeLow:
candidate = "low"
case llms.ThinkingModeMedium:
candidate = "medium"
case llms.ThinkingModeHigh:
candidate = "high"
case llms.ThinkingModeXHigh:
candidate = "xhigh"
}
if supportsReasoningEffort(modelCaps, candidate) {
reasoningEffort = candidate
}

// Handle streaming for thinking
if config.StreamThinking && opts.StreamingReasoningFunc == nil && opts.StreamingFunc != nil {
// Set up default reasoning streaming if requested but not provided
// Wrap the single-param streaming func into a reasoning func
opts.StreamingReasoningFunc = func(ctx context.Context, reasoningChunk []byte, chunk []byte) error {
// For default behavior, we might want to stream both or just the main content
// Here we'll just stream the main content chunk
if len(chunk) > 0 {
return opts.StreamingFunc(ctx, chunk)
}
return nil
if config.StreamThinking && opts.StreamingReasoningFunc == nil && opts.StreamingFunc != nil {
streamFn := opts.StreamingFunc
opts.StreamingReasoningFunc = func(ctx context.Context, _ []byte, chunk []byte) error {
if len(chunk) > 0 {
return streamFn(ctx, chunk)
}
return nil
}
}
}
*/
}

// Filter out internal metadata that shouldn't be sent to API
apiMetadata := make(map[string]any)
Expand Down
4 changes: 4 additions & 0 deletions llms/reasoning.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ const (
// ThinkingModeHigh allocates maximum tokens for thinking (~80% of max tokens).
ThinkingModeHigh ThinkingMode = "high"

// ThinkingModeXHigh allocates the maximum reasoning budget.
// Only supported by gpt-5.1-codex-max and later.
ThinkingModeXHigh ThinkingMode = "xhigh"

// ThinkingModeAuto lets the model decide how much thinking is needed.
ThinkingModeAuto ThinkingMode = "auto"
)
Expand Down
Loading