tmc · feizhe1996 · May 22, 2026
diff --git a/llms/openai/internal/openaiclient/chat.go b/llms/openai/internal/openaiclient/chat.go
@@ -103,13 +103,15 @@ func (r ChatRequest) MarshalJSON() ([]byte, error) {
 		Alias: (*Alias)(&r),
 	}
 
-	// Handle temperature for reasoning models
-	if isReasoningModel(r.Model) {
-		// Reasoning models (GPT-5, o1, o3) only accept temperature=1 (default)
-		// Omit temperature field to let API use its default value
+	// Handle temperature for reasoning models.
+	// When reasoning is enabled (reasoning_effort !== "none"), this API only
+	// accepts the default temperature behavior, so omit the temperature field.
+	// When reasoning_effort === "none", temperature may be provided explicitly,
+	// including temperature: 0.
+	// https://developers.openai.com/api/docs/guides/latest-model#gpt-54-parameter-compatibility
+	if isReasoningModel(r.Model) && r.ReasoningEffort != "none" {
 		aux.Temperature = nil
 	} else {
-		// For regular models, always send temperature
 		aux.Temperature = &r.Temperature
 	}
 
@@ -131,17 +133,17 @@ func (r ChatRequest) MarshalJSON() ([]byte, error) {
 }
 
 // isReasoningModel returns true if the model is a reasoning model that has temperature constraints.
-// Reasoning models (GPT-5, o1, o3) only accept temperature=1 and reject other values.
+// Reasoning models only accept temperature=1 and reject other values unless reasoning is disabled.
 func isReasoningModel(model string) bool {
-	// o1 series: o1-preview, o1-mini
-	if strings.HasPrefix(model, "o1-") {
+	// o1 series: o1, o1-mini, o1-preview, …
+	if model == "o1" || strings.HasPrefix(model, "o1-") {
 		return true
 	}
-	// o3 series: o3, o3-mini (note: "o3" without suffix is also valid)
+	// o3 series: o3, o3-mini, …
 	if model == "o3" || strings.HasPrefix(model, "o3-") {
 		return true
 	}
-	// GPT-5 series (when released)
+	// GPT-5 series
 	if strings.HasPrefix(model, "gpt-5") {
 		return true
 	}

diff --git a/llms/openai/openaillm.go b/llms/openai/openaillm.go
@@ -27,16 +27,20 @@ const (
 	RoleTool      = "tool"
 )
 
-// ModelCapability defines what a model supports
+// ModelCapability defines what a model supports.
 type ModelCapability struct {
-	Pattern          string // Regex pattern to match model names
-	SupportsSystem   bool   // If true, supports system messages
-	SupportsThinking bool   // If true, supports reasoning/thinking
-	SupportsCaching  bool   // If true, supports prompt caching
-	// Add more capabilities as needed
+	Pattern                   string   // Regex pattern to match model names
+	SupportsSystem            bool     // If true, supports system messages
+	SupportsThinking          bool     // If true, is a reasoning/thinking model
+	SupportsCaching           bool     // If true, supports prompt caching
+	SupportedReasoningEfforts []string // Valid reasoning_effort values for this model variant; Support and allowed values differ across model families (e.g. gpt-5.4+ accepts none/low/medium/high/xhigh, earlier o-series do not accept the parameter at all)
 }
 
-// modelCapabilities defines capabilities for different model patterns
+// reasoningEffortsGPT54Plus is the set of valid reasoning_effort values for gpt-5.4 and later.
+var reasoningEffortsGPT54Plus = []string{"none", "low", "medium", "high", "xhigh"}
+
+// modelCapabilities defines capabilities for different model patterns.
+// Patterns are evaluated in order; more specific patterns must appear first.
 var modelCapabilities = []ModelCapability{
 	// OpenAI reasoning models (o1, o3 series) - no system message support
 	{
@@ -45,6 +49,14 @@ var modelCapabilities = []ModelCapability{
 		SupportsThinking: true,
 		SupportsCaching:  false,
 	},
+	// gpt-5.4 and later: supports reasoning_effort
+	{
+		Pattern:                   `(?i)^gpt-5\.[4-9]`,
+		SupportsSystem:            true,
+		SupportsThinking:          true,
+		SupportsCaching:           false,
+		SupportedReasoningEfforts: reasoningEffortsGPT54Plus,
+	},
 	// GPT-4 models
 	{
 		Pattern:          `(?i)^gpt-4`, // Matches gpt-4, gpt-4-turbo, etc.
@@ -59,7 +71,16 @@ var modelCapabilities = []ModelCapability{
 		SupportsThinking: false,
 		SupportsCaching:  false,
 	},
-	// Future models can be added here
+}
+
+// supportsReasoningEffort reports whether the given effort value is valid for the model.
+func supportsReasoningEffort(caps ModelCapability, effort string) bool {
+	for _, e := range caps.SupportedReasoningEfforts {
+		if e == effort {
+			return true
+		}
+	}
+	return false
 }
 
 // getModelCapabilities returns the capabilities for a given model
@@ -210,40 +231,41 @@ func (o *LLM) GenerateContent(ctx context.Context, messages []llms.MessageConten
 		}
 	}
 
-	// Extract reasoning effort for thinking models
-	// Note: OpenAI o1/o3 models have built-in reasoning and don't support reasoning_effort parameter
-	// This is kept for future models that might support it (like GPT-5)
+	// Set reasoning_effort for GPT-5 models. The valid set varies by sub-model:
+	//   gpt-5.1:         none, low, medium, high
+	//   gpt-5.4: none, low, medium, high, xhigh
+	// Unsupported values are silently omitted rather than sent to the API.
 	var reasoningEffort string
-	// Commented out for now since current o1 models don't support this parameter
-	/*
-		if opts.Metadata != nil {
-			if config, ok := opts.Metadata["thinking_config"].(*llms.ThinkingConfig); ok {
-				// Map thinking mode to reasoning effort
-				switch config.Mode {
-				case llms.ThinkingModeLow:
-					reasoningEffort = "low"
-				case llms.ThinkingModeMedium:
-					reasoningEffort = "medium"
-				case llms.ThinkingModeHigh:
-					reasoningEffort = "high"
-				}
+	if len(modelCaps.SupportedReasoningEfforts) > 0 {
+		if config := llms.GetThinkingConfig(&opts); config != nil {
+			var candidate string
+			switch config.Mode {
+			case llms.ThinkingModeNone:
+				candidate = "none"
+			case llms.ThinkingModeLow:
+				candidate = "low"
+			case llms.ThinkingModeMedium:
+				candidate = "medium"
+			case llms.ThinkingModeHigh:
+				candidate = "high"
+			case llms.ThinkingModeXHigh:
+				candidate = "xhigh"
+			}
+			if supportsReasoningEffort(modelCaps, candidate) {
+				reasoningEffort = candidate
+			}
 
-				// Handle streaming for thinking
-				if config.StreamThinking && opts.StreamingReasoningFunc == nil && opts.StreamingFunc != nil {
-					// Set up default reasoning streaming if requested but not provided
-					// Wrap the single-param streaming func into a reasoning func
-					opts.StreamingReasoningFunc = func(ctx context.Context, reasoningChunk []byte, chunk []byte) error {
-						// For default behavior, we might want to stream both or just the main content
-						// Here we'll just stream the main content chunk
-						if len(chunk) > 0 {
-							return opts.StreamingFunc(ctx, chunk)
-						}
-						return nil
+			if config.StreamThinking && opts.StreamingReasoningFunc == nil && opts.StreamingFunc != nil {
+				streamFn := opts.StreamingFunc
+				opts.StreamingReasoningFunc = func(ctx context.Context, _ []byte, chunk []byte) error {
+					if len(chunk) > 0 {
+						return streamFn(ctx, chunk)
 					}
+					return nil
 				}
 			}
 		}
-	*/
+	}
 
 	// Filter out internal metadata that shouldn't be sent to API
 	apiMetadata := make(map[string]any)

diff --git a/llms/reasoning.go b/llms/reasoning.go
@@ -18,6 +18,10 @@ const (
 	// ThinkingModeHigh allocates maximum tokens for thinking (~80% of max tokens).
 	ThinkingModeHigh ThinkingMode = "high"
 
+	// ThinkingModeXHigh allocates the maximum reasoning budget.
+	// Only supported by gpt-5.1-codex-max and later.
+	ThinkingModeXHigh ThinkingMode = "xhigh"
+
 	// ThinkingModeAuto lets the model decide how much thinking is needed.
 	ThinkingModeAuto ThinkingMode = "auto"
 )