diff --git a/evals/registry/solvers/minimax.yaml b/evals/registry/solvers/minimax.yaml new file mode 100644 index 0000000000..fec62edb9e --- /dev/null +++ b/evals/registry/solvers/minimax.yaml @@ -0,0 +1,68 @@ +# ------------------ +# MiniMax-M2.7 +# ------------------ + +generation/direct/MiniMax-M2.7: + class: evals.solvers.providers.minimax.minimax_solver:MiniMaxSolver + args: + completion_fn_options: + model: MiniMax-M2.7 + extra_options: + temperature: 1 + max_tokens: 512 + postprocessors: &postprocessors + - evals.solvers.postprocessors.postprocessors:Strip + +generation/cot/MiniMax-M2.7: + class: evals.solvers.nested.cot_solver:CoTSolver + args: + cot_solver: + class: evals.solvers.providers.minimax.minimax_solver:MiniMaxSolver + args: + completion_fn_options: + model: MiniMax-M2.7 + extra_options: + temperature: 1 + max_tokens: 512 + extract_solver: + class: evals.solvers.providers.minimax.minimax_solver:MiniMaxSolver + args: + completion_fn_options: + model: MiniMax-M2.7 + extra_options: + temperature: 1 + max_tokens: 512 + +# ------------------ +# MiniMax-M2.7-highspeed +# ------------------ + +generation/direct/MiniMax-M2.7-highspeed: + class: evals.solvers.providers.minimax.minimax_solver:MiniMaxSolver + args: + completion_fn_options: + model: MiniMax-M2.7-highspeed + extra_options: + temperature: 1 + max_tokens: 512 + postprocessors: *postprocessors + +generation/cot/MiniMax-M2.7-highspeed: + class: evals.solvers.nested.cot_solver:CoTSolver + args: + cot_solver: + class: evals.solvers.providers.minimax.minimax_solver:MiniMaxSolver + args: + completion_fn_options: + model: MiniMax-M2.7-highspeed + extra_options: + temperature: 1 + max_tokens: 512 + extract_solver: + class: evals.solvers.providers.minimax.minimax_solver:MiniMaxSolver + args: + completion_fn_options: + model: MiniMax-M2.7-highspeed + extra_options: + temperature: 1 + max_tokens: 512 diff --git a/evals/solvers/providers/minimax/minimax_solver.py b/evals/solvers/providers/minimax/minimax_solver.py new file mode 100644 index 0000000000..821694a8b2 --- /dev/null +++ b/evals/solvers/providers/minimax/minimax_solver.py @@ -0,0 +1,111 @@ +import logging +import os +from typing import Optional + +from openai import BadRequestError + +from evals.solvers.providers.openai.openai_solver import OpenAISolver +from evals.solvers.solver import SolverResult + +MINIMAX_CHAT_MODELS = { + "MiniMax-M2.7", + "MiniMax-M2.7-highspeed", +} + + +def is_chat_model(model: str) -> bool: + if model in MINIMAX_CHAT_MODELS: + return True + raise NotImplementedError(f"Model {model} not currently supported by MiniMaxSolver") + + +class MiniMaxSolver(OpenAISolver): + """ + A solver class for the MiniMax API via the OpenAI python SDK completion functions. + Leveraging the OpenAISolver class, with some overrides. + + Specifically we override: + - `_api_base` to point to the MiniMax API (OpenAI-compatible endpoint) + - `_api_key` to use the MINIMAX_API_KEY environment variable + - `_is_chat_model` to use MiniMax's supported chat models + - `_preprocess_completion_fn_options` to not perform any completion fn options preprocessing + - `_perform_prechecks` to not perform any checks before calling the API + + MiniMax API constraints: + - temperature must be in (0.0, 1.0], cannot be 0 + - response_format is not supported + - valid_answers (logit_bias) is not supported + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + if self.valid_answers is not None: + raise NotImplementedError("`valid_answers` not supported by MiniMaxSolver") + + @property + def _api_base(self) -> Optional[str]: + """The base URL for the API""" + return os.environ.get("MINIMAX_BASE_URL", "https://api.minimax.io/v1") + + @property + def _api_key(self) -> Optional[str]: + """The API key to use for the API""" + return os.environ.get("MINIMAX_API_KEY") + + @property + def _completion_exception(self) -> Exception: + """ + Overrides OpenAISolver implementation; + MiniMax API uses BadRequestError for context length issues + """ + return BadRequestError + + def _is_chat_model(self, model: str) -> bool: + """ + Overrides OpenAISolver implementation; + Need to use different dictionary of chat models + """ + return is_chat_model(model) + + def _preprocess_completion_fn_options(self) -> dict: + """ + Overrides OpenAISolver implementation; Here we do not perform any completion fn + options preprocessing since the MiniMaxSolver does not support the + `valid_answers` parameter (logit_bias) + """ + + def _perform_prechecks(self, msgs: list[dict[str, str]]) -> Optional[SolverResult]: + """ + Overrides OpenAISolver implementation; Here we do not perform any prechecks + since the MiniMaxSolver does not support context length checks due to the lack + of a tokenizer in tiktoken for MiniMax models. + """ + return None + + def _handle_completion_exception(self, e: Exception) -> SolverResult: + """ + Handles any expected exceptions from the MiniMax API completion function. + """ + if hasattr(e, "code") and e.code == "context_length_exceeded": + logging.warning( + f"MiniMax API context length exceeded, using error message as solver response: {e.message}" + ) + solver_result = SolverResult( + e.message, + error=e.body, + ) + elif hasattr(e, "message") and ( + "Please reduce your prompt" in e.message + or "'$.messages' is too long" in e.message + ): + logging.warning( + f"MiniMax API error, using error message as solver response: {e.message}" + ) + solver_result = SolverResult( + e.message, + error=e.body, + ) + else: + raise e + + return solver_result diff --git a/evals/solvers/providers/minimax/minimax_solver_test.py b/evals/solvers/providers/minimax/minimax_solver_test.py new file mode 100644 index 0000000000..c7844913a2 --- /dev/null +++ b/evals/solvers/providers/minimax/minimax_solver_test.py @@ -0,0 +1,87 @@ +import os +from unittest.mock import patch + +import pytest + +from evals.solvers.providers.minimax.minimax_solver import ( + MINIMAX_CHAT_MODELS, + MiniMaxSolver, + is_chat_model, +) + + +class TestIsChatModel: + def test_m27_is_chat_model(self): + assert is_chat_model("MiniMax-M2.7") is True + + def test_m27_highspeed_is_chat_model(self): + assert is_chat_model("MiniMax-M2.7-highspeed") is True + + def test_unknown_model_raises(self): + with pytest.raises(NotImplementedError, match="not currently supported"): + is_chat_model("unknown-model") + + def test_all_models_are_chat_models(self): + for model in MINIMAX_CHAT_MODELS: + assert is_chat_model(model) is True + + +class TestMiniMaxSolverProperties: + @pytest.fixture + def solver(self): + return MiniMaxSolver( + completion_fn_options={ + "model": "MiniMax-M2.7", + "extra_options": {"temperature": 1, "max_tokens": 512}, + }, + ) + + @pytest.fixture + def solver_highspeed(self): + return MiniMaxSolver( + completion_fn_options={ + "model": "MiniMax-M2.7-highspeed", + "extra_options": {"temperature": 1, "max_tokens": 512}, + }, + ) + + def test_default_api_base(self, solver): + assert solver._api_base == "https://api.minimax.io/v1" + + def test_custom_api_base(self, solver): + with patch.dict(os.environ, {"MINIMAX_BASE_URL": "https://custom.api.io/v1"}): + assert solver._api_base == "https://custom.api.io/v1" + + def test_api_key_from_env(self, solver): + with patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"}): + assert solver._api_key == "test-key-123" + + def test_api_key_none_when_unset(self, solver): + with patch.dict(os.environ, {}, clear=True): + assert solver._api_key is None + + def test_model_name(self, solver): + assert solver.model == "MiniMax-M2.7" + + def test_highspeed_model_name(self, solver_highspeed): + assert solver_highspeed.model == "MiniMax-M2.7-highspeed" + + def test_valid_answers_raises(self): + with pytest.raises(NotImplementedError, match="valid_answers"): + MiniMaxSolver( + completion_fn_options={ + "model": "MiniMax-M2.7", + "extra_options": {"temperature": 1}, + }, + valid_answers=["yes", "no"], + ) + + def test_prechecks_returns_none(self, solver): + msgs = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello!"}, + ] + assert solver._perform_prechecks(msgs) is None + + def test_preprocess_completion_fn_options_does_nothing(self, solver): + assert solver._preprocess_completion_fn_options() is None