diff --git a/code-interpreter/app/app_configs.py b/code-interpreter/app/app_configs.py index ce451eb..7b20615 100644 --- a/code-interpreter/app/app_configs.py +++ b/code-interpreter/app/app_configs.py @@ -39,6 +39,14 @@ HOST = os.environ.get("HOST") or "0.0.0.0" # noqa: S104 PORT = int(os.environ.get("PORT") or "8000") +# Logging configuration +# LOG_LEVEL controls verbosity (e.g. DEBUG, INFO, WARNING). +# LOG_FORMAT selects the output style: "plain" (default human-readable text) or +# "json" (structured single-line JSON suitable for container log aggregators). +LOG_LEVEL = (os.environ.get("LOG_LEVEL") or "INFO").upper() +LOG_FORMAT = (os.environ.get("LOG_FORMAT") or "plain").lower() +JSON_LOGGING = LOG_FORMAT == "json" + # File storage configuration FILE_STORAGE_DIR = ( os.environ.get("FILE_STORAGE_DIR") or "/tmp/code-interpreter-files" # noqa: S108 diff --git a/code-interpreter/app/logging_config.py b/code-interpreter/app/logging_config.py new file mode 100644 index 0000000..3d1d1b9 --- /dev/null +++ b/code-interpreter/app/logging_config.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import logging +from typing import Final + +from app.app_configs import JSON_LOGGING, LOG_LEVEL + +PLAIN_FORMAT: Final[str] = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + +# Uvicorn installs its own handlers on these loggers. We clear them so records +# propagate to the root logger and are emitted through our single handler/format. +_UVICORN_LOGGERS: Final[tuple[str, ...]] = ("uvicorn", "uvicorn.error", "uvicorn.access") + + +class _DropColorMessageFilter(logging.Filter): + """Drop uvicorn's ``color_message`` record attribute. + + Uvicorn attaches an ANSI-colored duplicate of the message as + ``record.color_message``. The JSON formatter would otherwise emit it as a + field carrying raw escape codes, polluting the structured output. + """ + + def filter(self, record: logging.LogRecord) -> bool: + if hasattr(record, "color_message"): + del record.color_message + return True + + +def get_json_formatter() -> logging.Formatter: + """Return a structured single-line JSON formatter. + + Standard record attributes are emitted as discrete top-level fields and any + ``extra`` keys passed to a logging call are merged in alongside them, which + makes the output suitable for container log aggregators. + + The ``pythonjsonlogger`` import is deferred to this call site (only reached + when ``LOG_FORMAT=json``) so importing this module never hard-fails in + environments where the optional dependency is absent. + """ + from pythonjsonlogger.json import JsonFormatter + + return JsonFormatter( + "%(asctime)s %(levelname)s %(name)s %(filename)s %(lineno)d %(message)s", + rename_fields={ + "asctime": "timestamp", + "levelname": "level", + "name": "logger", + }, + datefmt="%Y-%m-%dT%H:%M:%S%z", + ) + + +def get_formatter() -> logging.Formatter: + """Return the configured formatter (JSON when ``LOG_FORMAT=json``).""" + if JSON_LOGGING: + return get_json_formatter() + return logging.Formatter(PLAIN_FORMAT) + + +def _resolve_level() -> tuple[int, bool]: + """Resolve LOG_LEVEL to a numeric level, falling back to INFO. + + Returns ``(level, was_valid)``. ``setLevel`` would raise ``ValueError`` on a + typo'd, operator-supplied level (e.g. ``INFOO``); we fall back to INFO and + let the caller warn, rather than crash the service at startup. + """ + level = logging.getLevelNamesMapping().get(LOG_LEVEL) + if level is None: + return logging.INFO, False + return level, True + + +def setup_logging() -> None: + """Configure root and uvicorn logging from the environment settings. + + Idempotent: the root logger's handlers are replaced (not appended) so that + repeated calls — e.g. module import plus an explicit startup call — do not + stack duplicate handlers. + + Note: this consolidates *all* logging (including uvicorn's access/error + logs) onto a single handler and format. In the default ``plain`` mode that + replaces uvicorn's own colorized access-log format with ``PLAIN_FORMAT``. + """ + formatter = get_formatter() + + handler = logging.StreamHandler() + handler.setFormatter(formatter) + handler.addFilter(_DropColorMessageFilter()) + + level, level_valid = _resolve_level() + + root = logging.getLogger() + root.handlers = [handler] + root.setLevel(level) + + # Route uvicorn's loggers through the root handler to keep one consistent + # format, and let them propagate rather than emitting via their own handlers. + for name in _UVICORN_LOGGERS: + uvicorn_logger = logging.getLogger(name) + uvicorn_logger.handlers = [] + uvicorn_logger.setLevel(level) + uvicorn_logger.propagate = True + + if not level_valid: + logging.getLogger(__name__).warning("Unknown LOG_LEVEL %r; falling back to INFO", LOG_LEVEL) diff --git a/code-interpreter/app/main.py b/code-interpreter/app/main.py index ece8700..028e225 100644 --- a/code-interpreter/app/main.py +++ b/code-interpreter/app/main.py @@ -14,16 +14,14 @@ from app.api.routes import router as api_router from app.app_configs import EXECUTOR_BACKEND, HOST, PORT, PYTHON_EXECUTOR_DOCKER_IMAGE from app.image_ref import normalize_image_ref +from app.logging_config import setup_logging from app.models.schemas import HealthResponse from app.services.executor_factory import get_executor SESSION_REAPER_INTERVAL_SEC = 30 # Configure logging -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", -) +setup_logging() logger = logging.getLogger(__name__) @@ -159,4 +157,6 @@ def run() -> None: """ import uvicorn - uvicorn.run("app.main:app", host=HOST, port=PORT, log_level="info") + # log_config=None keeps the logging configured by setup_logging(); otherwise + # uvicorn would install its own handlers/formatters and bypass our format. + uvicorn.run("app.main:app", host=HOST, port=PORT, log_config=None) diff --git a/code-interpreter/pyproject.toml b/code-interpreter/pyproject.toml index 56520ac..4c11e44 100644 --- a/code-interpreter/pyproject.toml +++ b/code-interpreter/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "python-multipart>=0.0.20", "uvicorn==0.30.6", "kubernetes>=31.0.0", + "python-json-logger>=3.1.0", ] [project.scripts] diff --git a/code-interpreter/tests/integration_tests/test_logging_config.py b/code-interpreter/tests/integration_tests/test_logging_config.py new file mode 100644 index 0000000..03c372f --- /dev/null +++ b/code-interpreter/tests/integration_tests/test_logging_config.py @@ -0,0 +1,158 @@ +from __future__ import annotations + +import importlib +import json +import logging +import os +from collections.abc import Iterator +from types import ModuleType + +import pytest + +_LOG_ENV_VARS = ("LOG_FORMAT", "LOG_LEVEL") + + +@pytest.fixture(autouse=True) +def _restore_logging() -> Iterator[None]: + """Snapshot and restore global state mutated by these tests. + + Each test reloads app.app_configs / app.logging_config under custom env and + calls setup_logging(), which mutates both module-level globals and the + process-wide logging configuration. We restore the logging handlers, the + LOG_* env vars, and — critically — reload both modules back to their + baseline so leftover module attributes (e.g. JSON_LOGGING=True) cannot leak + into the rest of the pytest session, which shares this process. + """ + root = logging.getLogger() + saved_root_handlers = root.handlers[:] + saved_root_level = root.level + saved: dict[str, tuple[list[logging.Handler], int, bool]] = {} + for name in ("uvicorn", "uvicorn.error", "uvicorn.access"): + lg = logging.getLogger(name) + saved[name] = (lg.handlers[:], lg.level, lg.propagate) + saved_env = {var: os.environ.get(var) for var in _LOG_ENV_VARS} + try: + yield + finally: + root.handlers = saved_root_handlers + root.setLevel(saved_root_level) + for name, (handlers, level, propagate) in saved.items(): + lg = logging.getLogger(name) + lg.handlers = handlers + lg.setLevel(level) + lg.propagate = propagate + # Restore the env first, then reload so module globals reflect baseline. + for var, value in saved_env.items(): + if value is None: + os.environ.pop(var, None) + else: + os.environ[var] = value + import app.app_configs as app_configs + import app.logging_config as logging_config + + importlib.reload(app_configs) + importlib.reload(logging_config) + + +def _reload_logging_config( + monkeypatch: pytest.MonkeyPatch, log_format: str, level: str = "INFO" +) -> ModuleType: + """Reload app.app_configs and app.logging_config under the given env.""" + monkeypatch.setenv("LOG_FORMAT", log_format) + monkeypatch.setenv("LOG_LEVEL", level) + import app.app_configs as app_configs + import app.logging_config as logging_config + + importlib.reload(app_configs) + return importlib.reload(logging_config) + + +def test_plain_format_is_default(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("LOG_FORMAT", raising=False) + import app.app_configs as app_configs + + importlib.reload(app_configs) + assert app_configs.JSON_LOGGING is False + + +def test_plain_logging_output( + monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + logging_config = _reload_logging_config(monkeypatch, "plain") + logging_config.setup_logging() + + logging.getLogger("app.test").info("hello plain") + + err = capsys.readouterr().err + assert "hello plain" in err + assert "app.test" in err + # Plain output is not JSON. + with pytest.raises(json.JSONDecodeError): + json.loads(err.strip().splitlines()[-1]) + + +def test_json_logging_output_and_extra_fields( + monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + logging_config = _reload_logging_config(monkeypatch, "json") + logging_config.setup_logging() + + logging.getLogger("app.test").info("hello json", extra={"session_id": "abc123"}) + + line = capsys.readouterr().err.strip().splitlines()[-1] + payload = json.loads(line) + + assert payload["message"] == "hello json" + assert payload["level"] == "INFO" + assert payload["logger"] == "app.test" + # extra fields are promoted to discrete top-level keys. + assert payload["session_id"] == "abc123" + # renamed standard fields are present. + assert "timestamp" in payload + + +def test_json_logging_strips_color_message( + monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + """Uvicorn's ANSI-coded color_message must not leak into JSON output.""" + logging_config = _reload_logging_config(monkeypatch, "json") + logging_config.setup_logging() + + logging.getLogger("uvicorn.error").info( + "Started server", extra={"color_message": "\x1b[36mStarted server\x1b[0m"} + ) + + line = capsys.readouterr().err.strip().splitlines()[-1] + payload = json.loads(line) + + assert payload["message"] == "Started server" + assert "color_message" not in payload + + +def test_invalid_log_level_falls_back_to_info(monkeypatch: pytest.MonkeyPatch) -> None: + """A typo'd LOG_LEVEL must not crash startup; it falls back to INFO.""" + logging_config = _reload_logging_config(monkeypatch, "plain", level="INFOO") + logging_config.setup_logging() # must not raise + + assert logging.getLogger().level == logging.INFO + + +def test_setup_logging_is_idempotent(monkeypatch: pytest.MonkeyPatch) -> None: + """Repeated calls must not stack duplicate handlers on the root logger.""" + logging_config = _reload_logging_config(monkeypatch, "plain") + logging_config.setup_logging() + logging_config.setup_logging() + logging_config.setup_logging() + + assert len(logging.getLogger().handlers) == 1 + + +def test_uvicorn_loggers_propagate_to_root(monkeypatch: pytest.MonkeyPatch) -> None: + """Uvicorn loggers should propagate to root and own no handlers of their own.""" + logging_config = _reload_logging_config(monkeypatch, "json") + logging_config.setup_logging() + + for name in ("uvicorn", "uvicorn.error", "uvicorn.access"): + lg = logging.getLogger(name) + assert lg.handlers == [] + assert lg.propagate is True diff --git a/code-interpreter/uv.lock b/code-interpreter/uv.lock index 327ddb2..dbf123d 100644 --- a/code-interpreter/uv.lock +++ b/code-interpreter/uv.lock @@ -104,6 +104,7 @@ dependencies = [ { name = "fastapi" }, { name = "httpx" }, { name = "kubernetes" }, + { name = "python-json-logger" }, { name = "python-multipart" }, { name = "uvicorn" }, ] @@ -123,6 +124,7 @@ requires-dist = [ { name = "fastapi", specifier = "==0.115.0" }, { name = "httpx", specifier = ">=0.28.1" }, { name = "kubernetes", specifier = ">=31.0.0" }, + { name = "python-json-logger", specifier = ">=3.1.0" }, { name = "python-multipart", specifier = ">=0.0.20" }, { name = "uvicorn", specifier = "==0.30.6" }, ] @@ -522,6 +524,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" }, ] +[[package]] +name = "python-json-logger" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f7/ff/3cc9165fd44106973cd7ac9facb674a65ed853494592541d339bdc9a30eb/python_json_logger-4.1.0.tar.gz", hash = "sha256:b396b9e3ed782b09ff9d6e4f1683d46c83ad0d35d2e407c09a9ebbf038f88195", size = 17573, upload-time = "2026-03-29T04:39:56.805Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/be/0631a861af4d1c875f096c07d34e9a63639560a717130e7a87cbc82b7e3f/python_json_logger-4.1.0-py3-none-any.whl", hash = "sha256:132994765cf75bf44554be9aa49b06ef2345d23661a96720262716438141b6b2", size = 15021, upload-time = "2026-03-29T04:39:55.266Z" }, +] + [[package]] name = "python-multipart" version = "0.0.20"