From 2fae30a4d71fe2c1dde57a3fda4479484ae1db7c Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Fri, 19 Jun 2026 11:18:55 +0200 Subject: [PATCH 01/12] feat: Add optional OpenTelemetry traces to pylon_service Traces are disabled by default and enabled by setting PYLON_OTEL_TRACES_ENDPOINT to an OTLP collector base URL (analogous to PYLON_SENTRY_DSN). When enabled, the new init_otel() sets up a TracerProvider exporting via OTLP HTTP/protobuf and auto-instruments the Litestar HTTP server, httpx, aiohttp (web3/EVM), SQLAlchemy and asyncio. The active span's trace_id/span_id are injected into structured logs for log<->trace correlation; request_id is kept since trace_id only exists when tracing is enabled. turbobt's default websocket transport to the chain is not auto-traced (no instrumentor exists); documented in the service README. --- pylon_service/README.md | 24 ++ .../pylon_service/envs/test_env.template | 6 + pylon_service/pylon_service/logging.py | 29 ++- pylon_service/pylon_service/main.py | 12 +- pylon_service/pylon_service/otel_config.py | 35 +++ pylon_service/pylon_service/settings.py | 8 + .../pylon_service/uvicorn_entrypoint.py | 3 + pylon_service/pyproject.toml | 7 + pylon_service/tests/test_otel_config.py | 79 ++++++ .../tests/unit/test_request_id_context.py | 17 ++ pylon_service/uv.lock | 234 ++++++++++++++++++ 11 files changed, 449 insertions(+), 5 deletions(-) create mode 100644 pylon_service/pylon_service/otel_config.py create mode 100644 pylon_service/tests/test_otel_config.py diff --git a/pylon_service/README.md b/pylon_service/README.md index f5025de3..486bcb48 100644 --- a/pylon_service/README.md +++ b/pylon_service/README.md @@ -466,6 +466,30 @@ That means: This keeps one execution path for public behavior. +## Observability + +The service ships optional, disabled-by-default integrations for error tracking and tracing. Both are enabled purely +by setting an endpoint/DSN — no separate feature flag. + +### Sentry + +- Enabled iff `PYLON_SENTRY_DSN` is set. Reports errors through the Litestar and asyncio integrations. + +### OpenTelemetry traces + +- Enabled iff `PYLON_OTEL_TRACES_ENDPOINT` is set to the base URL of an OTLP collector (e.g. `http://alloy:4318`). + Traces are exported via OTLP HTTP/protobuf to `/v1/traces`. +- When enabled, auto-instrumentation covers: the Litestar HTTP server (incoming requests), `httpx` and `aiohttp` + (outgoing HTTP, including web3/EVM RPC), `SQLAlchemy` (database), and `asyncio`. The active span's `trace_id` / + `span_id` are injected into structured logs for log↔trace correlation. +- **Not traced:** the default Bittensor chain transport in `turbobt` is websockets, for which no OpenTelemetry + instrumentation exists — so chain RPC calls are not auto-traced. They are covered only when + `PYLON_BITTENSOR_NETWORK` points at an `http(s)://` URI, where `turbobt` falls back to `httpx`. +- The service does not ship a collector. Running and configuring Alloy (or any OTLP collector) at the configured + endpoint, including any tail-sampling or endpoint filtering, is the deployer's responsibility. +- Traces require the service to run as a single uvicorn process; `--workers` is rejected (see + `uvicorn_entrypoint.py`) because the SDK is initialised once at import and would not survive `fork()`. + ## Change checklist Before merging changes in `pylon_service`, verify: diff --git a/pylon_service/pylon_service/envs/test_env.template b/pylon_service/pylon_service/envs/test_env.template index 9cba1a29..d94fabb0 100644 --- a/pylon_service/pylon_service/envs/test_env.template +++ b/pylon_service/pylon_service/envs/test_env.template @@ -38,6 +38,12 @@ PYLON_OTEL_SERVICE_VERSION= # To override, uncomment the line below and set a non-empty value (an empty value would disable the fallback). # PYLON_OTEL_DEPLOYMENT_ENVIRONMENT= +# OTLP traces exporter endpoint - base URL of the collector, e.g. http://alloy:4318. +# Leave empty to disable OpenTelemetry traces (disabled by default). When set, traces are exported via +# OTLP HTTP/protobuf to /v1/traces, and the Litestar HTTP server plus the httpx, aiohttp, +# asyncio and SQLAlchemy libraries are auto-instrumented. You must run your own OTLP collector at this address. +PYLON_OTEL_TRACES_ENDPOINT= + # Sentry Configuration # DSN address for pylon to use - leave empty to disable sentry integration. PYLON_SENTRY_DSN= diff --git a/pylon_service/pylon_service/logging.py b/pylon_service/pylon_service/logging.py index 8c30f39c..40948d2d 100644 --- a/pylon_service/pylon_service/logging.py +++ b/pylon_service/pylon_service/logging.py @@ -52,6 +52,21 @@ def add_request_id_to_structlog( return event_dict +def add_otel_context_to_structlog( + logger: WrappedLogger, + method_name: str, + event_dict: EventDict, +) -> EventDict: + """Structlog processor injecting the active span's trace_id and span_id into the log event.""" + from opentelemetry import trace + + ctx = trace.get_current_span().get_span_context() + if ctx.is_valid: + event_dict["trace_id"] = format(ctx.trace_id, "032x") + event_dict["span_id"] = format(ctx.span_id, "016x") + return event_dict + + def add_coro_name_to_structlog( logger: WrappedLogger, method_name: str, @@ -75,6 +90,9 @@ def add_coro_name_to_structlog( # coro_name and OTEL attributes are layered on top per formatter: the raw formatter omits coro_name to stay # recursion-safe (see _get_current_coroutine_name), and OTEL resource attributes are only injected into the # json (production) output to keep dev console clean. +# The OTEL trace context (trace_id/span_id) is injected into every formatter, including the dev console, +# for log<->trace correlation; it reads only OTEL contextvars (no env lookup, no logging) so it stays +# recursion-safe, and adds nothing when tracing is disabled (the active span context is then invalid). _RAW_FOREIGN_PRE_CHAIN = ( structlog.processors.TimeStamper(fmt="iso"), structlog.stdlib.add_logger_name, @@ -85,9 +103,14 @@ def add_coro_name_to_structlog( # raw json output (production) carries OTEL resource attributes for parity with the json formatter, but # still omits coro_name to stay recursion-safe. -_RAW_JSON_FOREIGN_PRE_CHAIN = (*_RAW_FOREIGN_PRE_CHAIN, add_otel_resource_to_structlog) -_CONSOLE_FOREIGN_PRE_CHAIN = (*_RAW_FOREIGN_PRE_CHAIN, add_coro_name_to_structlog) -_JSON_FOREIGN_PRE_CHAIN = (*_RAW_FOREIGN_PRE_CHAIN, add_coro_name_to_structlog, add_otel_resource_to_structlog) +_RAW_JSON_FOREIGN_PRE_CHAIN = (*_RAW_FOREIGN_PRE_CHAIN, add_otel_resource_to_structlog, add_otel_context_to_structlog) +_CONSOLE_FOREIGN_PRE_CHAIN = (*_RAW_FOREIGN_PRE_CHAIN, add_coro_name_to_structlog, add_otel_context_to_structlog) +_JSON_FOREIGN_PRE_CHAIN = ( + *_RAW_FOREIGN_PRE_CHAIN, + add_coro_name_to_structlog, + add_otel_resource_to_structlog, + add_otel_context_to_structlog, +) _JSON_RENDER_PROCESSORS = [ structlog.stdlib.ProcessorFormatter.remove_processors_meta, diff --git a/pylon_service/pylon_service/main.py b/pylon_service/pylon_service/main.py index 96ead920..881477b0 100644 --- a/pylon_service/pylon_service/main.py +++ b/pylon_service/pylon_service/main.py @@ -1,6 +1,8 @@ from litestar import Litestar +from litestar.contrib.opentelemetry import OpenTelemetryConfig, OpenTelemetryPlugin from litestar.di import Provide from litestar.openapi.config import OpenAPIConfig +from litestar.plugins import PluginProtocol from litestar.plugins.prometheus import PrometheusConfig from pylon_service import dependencies, lifecycle @@ -11,10 +13,11 @@ from pylon_service.logging import litestar_logging_config from pylon_service.middleware.request_id import RequestIdMiddleware from pylon_service.middleware.request_timeout import RequestTimeoutMiddleware +from pylon_service.otel_config import init_otel from pylon_service.prometheus_controller import AuthenticatedPrometheusController from pylon_service.schema import PylonSchemaPlugin from pylon_service.sentry_config import init_sentry -from pylon_service.settings import response_cache_config, settings +from pylon_service.settings import otel_settings, response_cache_config, settings from pylon_service.stores import stores @@ -27,6 +30,10 @@ def create_app() -> Litestar: group_path=True, # Group metrics by path template to avoid cardinality explosion ) + plugins: list[PluginProtocol] = [PylonSchemaPlugin()] + if otel_settings.traces_enabled: + plugins.append(OpenTelemetryPlugin(OpenTelemetryConfig())) + return Litestar( route_handlers=[ v1_router, @@ -49,7 +56,7 @@ def create_app() -> Litestar: lifecycle.reschedule_weight_tasks_on_startup, ], dependencies={"bt_contact_pool": Provide(dependencies.bt_contact_pool_dep, use_cache=True)}, - plugins=[PylonSchemaPlugin()], + plugins=plugins, exception_handlers={ArchiveFallbackException: archive_fallback_handler}, stores=stores, response_cache_config=response_cache_config, @@ -58,5 +65,6 @@ def create_app() -> Litestar: ) +init_otel() init_sentry() app = create_app() diff --git a/pylon_service/pylon_service/otel_config.py b/pylon_service/pylon_service/otel_config.py new file mode 100644 index 00000000..8dfd77aa --- /dev/null +++ b/pylon_service/pylon_service/otel_config.py @@ -0,0 +1,35 @@ +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor +from opentelemetry.instrumentation.asyncio import AsyncioInstrumentor +from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor +from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor + +from pylon_service.db.database import engine as db_engine +from pylon_service.settings import otel_settings + + +def init_otel() -> None: + """ + Initialize OpenTelemetry tracing if a traces endpoint is configured. + + Sets up a TracerProvider exporting via OTLP HTTP/protobuf and auto-instruments the + httpx, aiohttp, asyncio, and SQLAlchemy libraries. A no-op when no endpoint is set. + """ + if not otel_settings.traces_enabled: + return + + resource = Resource.create(otel_settings.resource_attributes()) + provider = TracerProvider(resource=resource) + provider.add_span_processor( + BatchSpanProcessor(OTLPSpanExporter(endpoint=f"{otel_settings.traces_endpoint}/v1/traces")) + ) + trace.set_tracer_provider(provider) + + HTTPXClientInstrumentor().instrument() + AioHttpClientInstrumentor().instrument() + AsyncioInstrumentor().instrument() + SQLAlchemyInstrumentor().instrument(engine=db_engine.sync_engine) diff --git a/pylon_service/pylon_service/settings.py b/pylon_service/pylon_service/settings.py index d072f4ec..5e8f7eaf 100644 --- a/pylon_service/pylon_service/settings.py +++ b/pylon_service/pylon_service/settings.py @@ -82,6 +82,7 @@ class OtelSettings(BaseSettings): deployment_environment: str = Field(default_factory=lambda: settings.environment) service_instance_id: str = _DEFAULT_SERVICE_INSTANCE_ID service_version: str = "" + traces_endpoint: str = "" model_config = SettingsConfigDict( env_file=ENV_FILE, @@ -90,6 +91,13 @@ class OtelSettings(BaseSettings): extra="ignore", ) + @property + def traces_enabled(self) -> bool: + """ + Return whether traces export is enabled (a non-empty endpoint is configured). + """ + return bool(self.traces_endpoint) + def resource_attributes(self) -> dict[str, str]: """Return OTEL resource attributes as dotted-key fields for log injection.""" attrs = { diff --git a/pylon_service/pylon_service/uvicorn_entrypoint.py b/pylon_service/pylon_service/uvicorn_entrypoint.py index beaf1d09..ff7fdd4c 100644 --- a/pylon_service/pylon_service/uvicorn_entrypoint.py +++ b/pylon_service/pylon_service/uvicorn_entrypoint.py @@ -14,6 +14,9 @@ def main() -> None: # - ensuring that only one worker performs tasks rescheduling in its startup # and the other wait for it to finish # - prometheus instrumentation + # - the OpenTelemetry SDK (TracerProvider + BatchSpanProcessor's background exporter + # thread) is initialised once at module import and would not survive fork(), so multiple + # workers would lose spans and risk deadlocks; it would need per-worker post-fork setup if any(arg == "--workers" or arg.startswith("--workers=") for arg in sys.argv[1:]): raise RuntimeError("Passing --workers is not supported for pylon-service.") diff --git a/pylon_service/pyproject.toml b/pylon_service/pyproject.toml index effd1770..dbfc3f56 100644 --- a/pylon_service/pyproject.toml +++ b/pylon_service/pyproject.toml @@ -18,6 +18,13 @@ dependencies = [ "aiosqlite>=0.22.1", "web3>=7.16.0", "structlog>=25,<26", + "opentelemetry-sdk~=1.41", + "opentelemetry-exporter-otlp-proto-http~=1.41", + "opentelemetry-instrumentation-asgi~=0.63b1", + "opentelemetry-instrumentation-httpx~=0.63b1", + "opentelemetry-instrumentation-aiohttp-client~=0.63b1", + "opentelemetry-instrumentation-sqlalchemy~=0.63b1", + "opentelemetry-instrumentation-asyncio~=0.63b1", ] [dependency-groups] diff --git a/pylon_service/tests/test_otel_config.py b/pylon_service/tests/test_otel_config.py new file mode 100644 index 00000000..24762370 --- /dev/null +++ b/pylon_service/tests/test_otel_config.py @@ -0,0 +1,79 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from pylon_service.otel_config import init_otel +from pylon_service.settings import OtelSettings + + +@pytest.mark.parametrize( + ("endpoint", "expected"), + [ + pytest.param("", False, id="empty_endpoint_disabled"), + pytest.param("http://alloy:4318", True, id="set_endpoint_enabled"), + ], +) +def test_traces_enabled_reflects_endpoint(endpoint, expected): + assert OtelSettings(traces_endpoint=endpoint).traces_enabled is expected + + +def test_init_otel_is_noop_when_disabled(): + with ( + patch("pylon_service.otel_config.otel_settings", OtelSettings(traces_endpoint="")), + patch("pylon_service.otel_config.trace.set_tracer_provider") as set_provider, + patch("pylon_service.otel_config.HTTPXClientInstrumentor") as httpx_instrumentor, + patch("pylon_service.otel_config.AioHttpClientInstrumentor") as aiohttp_instrumentor, + patch("pylon_service.otel_config.AsyncioInstrumentor") as asyncio_instrumentor, + patch("pylon_service.otel_config.SQLAlchemyInstrumentor") as sqlalchemy_instrumentor, + ): + init_otel() + + set_provider.assert_not_called() + httpx_instrumentor.assert_not_called() + aiohttp_instrumentor.assert_not_called() + asyncio_instrumentor.assert_not_called() + sqlalchemy_instrumentor.assert_not_called() + + +def test_init_otel_installs_provider_and_instruments_when_enabled(): + sync_engine = MagicMock() + db_engine = MagicMock(sync_engine=sync_engine) + + with ( + patch("pylon_service.otel_config.otel_settings", OtelSettings(traces_endpoint="http://alloy:4318")), + patch("pylon_service.otel_config.db_engine", db_engine), + patch("pylon_service.otel_config.trace.set_tracer_provider") as set_provider, + patch("pylon_service.otel_config.OTLPSpanExporter") as exporter, + patch("pylon_service.otel_config.HTTPXClientInstrumentor") as httpx_instrumentor, + patch("pylon_service.otel_config.AioHttpClientInstrumentor") as aiohttp_instrumentor, + patch("pylon_service.otel_config.AsyncioInstrumentor") as asyncio_instrumentor, + patch("pylon_service.otel_config.SQLAlchemyInstrumentor") as sqlalchemy_instrumentor, + ): + init_otel() + + set_provider.assert_called_once() + exporter.assert_called_once_with(endpoint="http://alloy:4318/v1/traces") + httpx_instrumentor.return_value.instrument.assert_called_once_with() + aiohttp_instrumentor.return_value.instrument.assert_called_once_with() + asyncio_instrumentor.return_value.instrument.assert_called_once_with() + sqlalchemy_instrumentor.return_value.instrument.assert_called_once_with(engine=sync_engine) + + +@pytest.mark.parametrize( + ("traces_enabled", "expected_present"), + [ + pytest.param(True, True, id="enabled_plugin_present"), + pytest.param(False, False, id="disabled_plugin_absent"), + ], +) +def test_create_app_registers_otel_plugin_only_when_enabled(traces_enabled, expected_present): + from litestar.contrib.opentelemetry import OpenTelemetryPlugin + + from pylon_service.main import create_app + + with patch("pylon_service.main.otel_settings") as otel_settings_mock: + otel_settings_mock.traces_enabled = traces_enabled + app = create_app() + + has_plugin = any(isinstance(plugin, OpenTelemetryPlugin) for plugin in app.plugins) + assert has_plugin is expected_present diff --git a/pylon_service/tests/unit/test_request_id_context.py b/pylon_service/tests/unit/test_request_id_context.py index 1855a9c1..9700e809 100644 --- a/pylon_service/tests/unit/test_request_id_context.py +++ b/pylon_service/tests/unit/test_request_id_context.py @@ -3,10 +3,12 @@ import pytest from litestar.types import HTTPRequestEvent, Message +from opentelemetry.sdk.trace import TracerProvider from pylon_service.logging import ( _get_current_coroutine_name, add_coro_name_to_structlog, + add_otel_context_to_structlog, add_otel_resource_to_structlog, add_request_id_to_structlog, ) @@ -112,3 +114,18 @@ def test_otel_resource_attributes_do_not_override_event_fields(): "service.instance.id": otel_settings.service_instance_id, "service.name": "override", } + + +def test_otel_context_processor_adds_nothing_without_active_span(): + assert add_otel_context_to_structlog(None, "info", {"event": "hello"}) == {"event": "hello"} + + +def test_otel_context_processor_injects_trace_and_span_ids(): + tracer = TracerProvider().get_tracer("test") + with tracer.start_as_current_span("test-span") as span: + ctx = span.get_span_context() + assert add_otel_context_to_structlog(None, "info", {"event": "hello"}) == { + "event": "hello", + "trace_id": format(ctx.trace_id, "032x"), + "span_id": format(ctx.span_id, "016x"), + } diff --git a/pylon_service/uv.lock b/pylon_service/uv.lock index 786d29ac..9dd644a7 100644 --- a/pylon_service/uv.lock +++ b/pylon_service/uv.lock @@ -169,6 +169,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/74/f5/9373290775639cb67a2fce7f629a1c240dce9f12fe927bc32b2736e16dfc/argcomplete-3.6.3-py3-none-any.whl", hash = "sha256:f5007b3a600ccac5d25bbce33089211dfd49eab4a7718da3f10e3082525a92ce", size = 43846, upload-time = "2025-10-20T03:33:33.021Z" }, ] +[[package]] +name = "asgiref" +version = "3.11.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/63/40/f03da1264ae8f7cfdbf9146542e5e7e8100a4c66ab48e791df9a03d3f6c0/asgiref-3.11.1.tar.gz", hash = "sha256:5f184dc43b7e763efe848065441eac62229c9f7b0475f41f80e207a114eda4ce", size = 38550, upload-time = "2026-02-03T13:30:14.33Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/0a/a72d10ed65068e115044937873362e6e32fab1b7dce0046aeb224682c989/asgiref-3.11.1-py3-none-any.whl", hash = "sha256:e8667a091e69529631969fd45dc268fa79b99c92c5fcdda727757e52146ec133", size = 24345, upload-time = "2026-02-03T13:30:13.039Z" }, +] + [[package]] name = "attrs" version = "25.4.0" @@ -375,6 +384,13 @@ dependencies = [ { name = "bittensor-pylon-commons" }, { name = "bittensor-wallet" }, { name = "litestar", extra = ["prometheus", "standard"] }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-instrumentation-aiohttp-client" }, + { name = "opentelemetry-instrumentation-asgi" }, + { name = "opentelemetry-instrumentation-asyncio" }, + { name = "opentelemetry-instrumentation-httpx" }, + { name = "opentelemetry-instrumentation-sqlalchemy" }, + { name = "opentelemetry-sdk" }, { name = "sentry-sdk", extra = ["litestar"] }, { name = "sqlalchemy" }, { name = "structlog" }, @@ -416,6 +432,13 @@ requires-dist = [ { name = "bittensor-pylon-commons", editable = "../pylon_commons" }, { name = "bittensor-wallet" }, { name = "litestar", extras = ["prometheus", "standard"] }, + { name = "opentelemetry-exporter-otlp-proto-http", specifier = "~=1.41" }, + { name = "opentelemetry-instrumentation-aiohttp-client", specifier = "~=0.63b1" }, + { name = "opentelemetry-instrumentation-asgi", specifier = "~=0.63b1" }, + { name = "opentelemetry-instrumentation-asyncio", specifier = "~=0.63b1" }, + { name = "opentelemetry-instrumentation-httpx", specifier = "~=0.63b1" }, + { name = "opentelemetry-instrumentation-sqlalchemy", specifier = "~=0.63b1" }, + { name = "opentelemetry-sdk", specifier = "~=1.41" }, { name = "sentry-sdk", extras = ["litestar"] }, { name = "sqlalchemy", specifier = ">=2.0.49" }, { name = "structlog", specifier = ">=25,<26" }, @@ -1210,6 +1233,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" }, ] +[[package]] +name = "googleapis-common-protos" +version = "1.75.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/c8/f439cffde755cffa462bfbb156278fa6f9d09119719af9814b858fd4f81f/googleapis_common_protos-1.75.0.tar.gz", hash = "sha256:53a062ff3c32552fbd62c11fe23768b78e4ddf0494d5e5fd97d3f4689c75fbbd", size = 151035, upload-time = "2026-05-07T08:04:49.423Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/c8/e2645aa8ed02fd4c7a2f59d68783b65b1f3cbdfe39a6308e156509d1fee8/googleapis_common_protos-1.75.0-py3-none-any.whl", hash = "sha256:961ed60399c457ceb0ee8f285a84c870aabc9c6a832b9d37bb281b5bebde43ed", size = 300631, upload-time = "2026-05-07T08:03:30.345Z" }, +] + [[package]] name = "greenlet" version = "3.5.0" @@ -1658,6 +1693,190 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b9/34/434c594e0125a16b05a7bedaea33e63c90abbfbe47e5729a735a8a8a90ea/nox-2025.11.12-py3-none-any.whl", hash = "sha256:707171f9f63bc685da9d00edd8c2ceec8405b8e38b5fb4e46114a860070ef0ff", size = 74447, upload-time = "2025-11-12T18:39:01.575Z" }, ] +[[package]] +name = "opentelemetry-api" +version = "1.42.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b4/1c/125e1c936c0873796771b7f04f6c93b9f1bf5d424cea90fda94a99f61da8/opentelemetry_api-1.42.1.tar.gz", hash = "sha256:56c63bea9f77b62856be8c47600474acad853b2924b99b1687c4cb6297166716", size = 72296, upload-time = "2026-05-21T16:32:49.335Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/ca/9520cc1f3dfbbd03ac5903bbf55833e257bc64b1cf30fa8b0d6df374d821/opentelemetry_api-1.42.1-py3-none-any.whl", hash = "sha256:51a69edacadbc03a8950ace1c4c21099cacc538820ac2c9e36277e78cebba714", size = 61311, upload-time = "2026-05-21T16:32:28.822Z" }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-common" +version = "1.42.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-proto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0e/9c/216acfeaedadf2e1937f4373929b20f73197c5c4a2546d4f584b7fa63813/opentelemetry_exporter_otlp_proto_common-1.42.1.tar.gz", hash = "sha256:04f1f01fb597c4249dfcd7f8b861c902c2102369d376d9d346ff38de4469a2ee", size = 21433, upload-time = "2026-05-21T16:32:55.526Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d6/43/2375e7612e1121a4518c17603b6e0b03ad94f565aafad53f464dc5be2bf6/opentelemetry_exporter_otlp_proto_common-1.42.1-py3-none-any.whl", hash = "sha256:f48d395ab815b444da118868977e9798ea354c25737d5cf39578ae894011c140", size = 17327, upload-time = "2026-05-21T16:32:33.387Z" }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-http" +version = "1.42.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "googleapis-common-protos" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/77/32/826bfa1d80ecea24f47808de03cd4a0d13c17ecc07712f45123f0f61e4ac/opentelemetry_exporter_otlp_proto_http-1.42.1.tar.gz", hash = "sha256:bf142a21035d7571ac3a09cb2e5639f49886f243972883cfe777ed3bf02b734d", size = 25406, upload-time = "2026-05-21T16:32:56.807Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/96/82cb223a1502f0787d4bbff12907f5f8d870a50731febcd5818d93ef9555/opentelemetry_exporter_otlp_proto_http-1.42.1-py3-none-any.whl", hash = "sha256:00a16da1b312a1d6c7233d600d557c91df71125af73020f3b9a7765bd699d59d", size = 21793, upload-time = "2026-05-21T16:32:35.277Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation" +version = "0.63b1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "packaging" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/6d/4de72d97ff54db1ed270c7a59c9b904b917c0ac7af429c086c388b824ddb/opentelemetry_instrumentation-0.63b1.tar.gz", hash = "sha256:32368d6ae52c8de20aa790a6ad86b10a76f09956092337ae37d675773990e541", size = 41081, upload-time = "2026-05-21T16:36:14.206Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/a1/9314e621c143e4d82a5bf7a43c2ff7a745d31023506336857607c8c543cc/opentelemetry_instrumentation-0.63b1-py3-none-any.whl", hash = "sha256:f1986716d52cc316ea5f60189098726a9071d8ecc0eee96c9ed110be08bade9c", size = 35577, upload-time = "2026-05-21T16:34:56.818Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-aiohttp-client" +version = "0.63b1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/47/6f/e7105760ec528b465238a06a05f8e6c358063e00ad53fed76fd625c6230c/opentelemetry_instrumentation_aiohttp_client-0.63b1.tar.gz", hash = "sha256:ec97399c02a7e278359efffdf16e93d59a7103b16f66790cda9b9496b171b136", size = 19041, upload-time = "2026-05-21T16:36:15.62Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/f8/f18666128e4b602601316ee73f35986c0a42ce44a615fd6b0f566c15e282/opentelemetry_instrumentation_aiohttp_client-0.63b1-py3-none-any.whl", hash = "sha256:5259c2c5103a5919941e0c45f2c95b055a50eb2ab39dc252f4b1e41ce6d984bb", size = 13675, upload-time = "2026-05-21T16:34:59.263Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-asgi" +version = "0.63b1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asgiref" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a0/b5/7ea3a9fd1b80e89786c14250bfaecf32a753c3fd08232690f4da8dc16e29/opentelemetry_instrumentation_asgi-0.63b1.tar.gz", hash = "sha256:267b422416d768f3c7f4054883b41d9c3a7c943d86d20032b738c99a3dbb5862", size = 26151, upload-time = "2026-05-21T16:36:18.368Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/7e/83986f27b421de04fab1e1a84e892621dac42e6432a9c66779505f4d1381/opentelemetry_instrumentation_asgi-0.63b1-py3-none-any.whl", hash = "sha256:1a22453dfa965f14799b10a674b8acbcb897a8a75c79136060af54214cc7886e", size = 15906, upload-time = "2026-05-21T16:35:04.162Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-asyncio" +version = "0.63b1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b2/1a/206fcf577eee1a1e88b37a01d837e25fcad27af3514d4697b32a0f04c425/opentelemetry_instrumentation_asyncio-0.63b1.tar.gz", hash = "sha256:0ae623583dcbe0ae17d63c995906d02a64213652ff180875ace680d1e6c286ec", size = 13942, upload-time = "2026-05-21T16:36:20.271Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/25/c98e90c803b3f9bcf1c1e8b1284d022f2894d45f2d350f8c64f75209523c/opentelemetry_instrumentation_asyncio-0.63b1-py3-none-any.whl", hash = "sha256:0baa80c9314569fbb868e07f0b8136da367203c2942df5813c184bde2537f44a", size = 13098, upload-time = "2026-05-21T16:35:06.83Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-httpx" +version = "0.63b1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/27/c2b4335bca030e893acbe5ff2b4f434868773bf94508be7e6bf5af981b24/opentelemetry_instrumentation_httpx-0.63b1.tar.gz", hash = "sha256:f41ec82f25c3abcdada621052db3e5fd648e3b43d55eec4b9c0c5d3ecb7b4ff4", size = 23557, upload-time = "2026-05-21T16:36:34.583Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/b8/f536780996195c3b9f2354998554671e05a7a262df8c043f63fe9e5a6f0b/opentelemetry_instrumentation_httpx-0.63b1-py3-none-any.whl", hash = "sha256:14df6e99d81be9a8cd238f6639b6fa52404c4d3ce219058fcb5dc8c0f2211f86", size = 16336, upload-time = "2026-05-21T16:35:32.221Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-sqlalchemy" +version = "0.63b1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "packaging" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cb/97/e5cb3ad027aebf7128faadeefe4d4cb0fc07ed32ef95e8fc9d828a077a85/opentelemetry_instrumentation_sqlalchemy-0.63b1.tar.gz", hash = "sha256:621f9eb800ea24a98b4eda968373e3909bfede0ff47f77b96f8b8a18bc2a2a1a", size = 18006, upload-time = "2026-05-21T16:36:46.855Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/bc/c0984c4c51da64cc2c37ce031b4fb7fab61d223f2188a6bc6b5f18035ae3/opentelemetry_instrumentation_sqlalchemy-0.63b1-py3-none-any.whl", hash = "sha256:d417414f6517963e9c1ee91ec971b94938b46904499114d035a43937bd62b6a1", size = 14410, upload-time = "2026-05-21T16:35:53.342Z" }, +] + +[[package]] +name = "opentelemetry-proto" +version = "1.42.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b4/55/63eac3e1089b768ba014091fdd2ae8a9a440c821ef5e2b786909c94c8836/opentelemetry_proto-1.42.1.tar.gz", hash = "sha256:c6a51e6b4f05ae63565f3a113217f3d2bfaec68f78c02d7a6c85f9010d1cfca6", size = 45839, upload-time = "2026-05-21T16:33:03.937Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/9d/171c02c84a76940b7e601805b3bb536985aded9168fbcc9ba52f0a730fa2/opentelemetry_proto-1.42.1-py3-none-any.whl", hash = "sha256:dedb74cba2886c59c7789b227a7a670613025a07489040050aedff6e5c0fb43c", size = 71782, upload-time = "2026-05-21T16:32:44.867Z" }, +] + +[[package]] +name = "opentelemetry-sdk" +version = "1.42.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/40/f7/b390bd9bfd703bf98a68fea1f27786c6872331fd617164a54b8a59bdc008/opentelemetry_sdk-1.42.1.tar.gz", hash = "sha256:8c834e8f8c9ba4171d4ec843d0cb8a67e4c7394d3f9e9297e582cbd9456ddbf7", size = 239262, upload-time = "2026-05-21T16:33:04.641Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/6b/4287766cfbde577ae2272e8884abac325aeaac0d64f41c61d5b8cc595105/opentelemetry_sdk-1.42.1-py3-none-any.whl", hash = "sha256:083cd4bbfaa5aa7b5a9e552430d9951219967cfb27aa61feb13a77aba1fc839d", size = 170907, upload-time = "2026-05-21T16:32:45.894Z" }, +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.63b1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/93/99/4d7dd6df64795951413ce6e815f8cf1eb191daf7196ae86574589643d5f3/opentelemetry_semantic_conventions-0.63b1.tar.gz", hash = "sha256:3daf963611334b365e98a57438183eb012d3bfb40b2d931a9af613476b8701a9", size = 148340, upload-time = "2026-05-21T16:33:05.455Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/7a/7fe66f5f3682b1dd47d88cc4e11f1c6c0966b737de2d16671146e23c39a5/opentelemetry_semantic_conventions-0.63b1-py3-none-any.whl", hash = "sha256:dfe5ef4dee82586b746f522b818ceb298d00b3d59f660042bd79404bff8d0682", size = 203713, upload-time = "2026-05-21T16:32:47.016Z" }, +] + +[[package]] +name = "opentelemetry-util-http" +version = "0.63b1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/d8/7bf5e4cec0578ac3c28c18eb7b88f34279139cbc8c568d6aa02b9c5ae53e/opentelemetry_util_http-0.63b1.tar.gz", hash = "sha256:ba1268f00922ee522dba2ae38458060f99486e7385a8056985901ca9685adfff", size = 11102, upload-time = "2026-05-21T16:36:56.675Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/f1/34e047e8f6a3c67e5220acf1af7b9f62868c25d77791bca74457bd2180a6/opentelemetry_util_http-0.63b1-py3-none-any.whl", hash = "sha256:6284194028c59cd439f8acfe388145069a6127f11dc077e1344a2094adacc3f8", size = 8205, upload-time = "2026-05-21T16:36:09.736Z" }, +] + [[package]] name = "packaging" version = "25.0" @@ -1847,6 +2066,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" }, ] +[[package]] +name = "protobuf" +version = "6.33.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/70/e908e9c5e52ef7c3a6c7902c9dfbb34c7e29c25d2f81ade3856445fd5c94/protobuf-6.33.6.tar.gz", hash = "sha256:a6768d25248312c297558af96a9f9c929e8c4cee0659cb07e780731095f38135", size = 444531, upload-time = "2026-03-18T19:05:00.988Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/9f/2f509339e89cfa6f6a4c4ff50438db9ca488dec341f7e454adad60150b00/protobuf-6.33.6-cp310-abi3-win32.whl", hash = "sha256:7d29d9b65f8afef196f8334e80d6bc1d5d4adedb449971fefd3723824e6e77d3", size = 425739, upload-time = "2026-03-18T19:04:48.373Z" }, + { url = "https://files.pythonhosted.org/packages/76/5d/683efcd4798e0030c1bab27374fd13a89f7c2515fb1f3123efdfaa5eab57/protobuf-6.33.6-cp310-abi3-win_amd64.whl", hash = "sha256:0cd27b587afca21b7cfa59a74dcbd48a50f0a6400cfb59391340ad729d91d326", size = 437089, upload-time = "2026-03-18T19:04:50.381Z" }, + { url = "https://files.pythonhosted.org/packages/5c/01/a3c3ed5cd186f39e7880f8303cc51385a198a81469d53d0fdecf1f64d929/protobuf-6.33.6-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:9720e6961b251bde64edfdab7d500725a2af5280f3f4c87e57c0208376aa8c3a", size = 427737, upload-time = "2026-03-18T19:04:51.866Z" }, + { url = "https://files.pythonhosted.org/packages/ee/90/b3c01fdec7d2f627b3a6884243ba328c1217ed2d978def5c12dc50d328a3/protobuf-6.33.6-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e2afbae9b8e1825e3529f88d514754e094278bb95eadc0e199751cdd9a2e82a2", size = 324610, upload-time = "2026-03-18T19:04:53.096Z" }, + { url = "https://files.pythonhosted.org/packages/9b/ca/25afc144934014700c52e05103c2421997482d561f3101ff352e1292fb81/protobuf-6.33.6-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c96c37eec15086b79762ed265d59ab204dabc53056e3443e702d2681f4b39ce3", size = 339381, upload-time = "2026-03-18T19:04:54.616Z" }, + { url = "https://files.pythonhosted.org/packages/16/92/d1e32e3e0d894fe00b15ce28ad4944ab692713f2e7f0a99787405e43533a/protobuf-6.33.6-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:e9db7e292e0ab79dd108d7f1a94fe31601ce1ee3f7b79e0692043423020b0593", size = 323436, upload-time = "2026-03-18T19:04:55.768Z" }, + { url = "https://files.pythonhosted.org/packages/c4/72/02445137af02769918a93807b2b7890047c32bfb9f90371cbc12688819eb/protobuf-6.33.6-py3-none-any.whl", hash = "sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901", size = 170656, upload-time = "2026-03-18T19:04:59.826Z" }, +] + [[package]] name = "pycparser" version = "3.0" From cab234da875c31d634e79ca4192ae108efd89f97 Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Fri, 19 Jun 2026 14:19:22 +0200 Subject: [PATCH 02/12] refactor: rename otel traces_endpoint setting to collector_endpoint --- pylon_service/README.md | 2 +- pylon_service/pylon_service/envs/test_env.template | 2 +- pylon_service/pylon_service/otel_config.py | 2 +- pylon_service/pylon_service/settings.py | 4 ++-- pylon_service/tests/test_otel_config.py | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pylon_service/README.md b/pylon_service/README.md index 486bcb48..a71c4f8f 100644 --- a/pylon_service/README.md +++ b/pylon_service/README.md @@ -477,7 +477,7 @@ by setting an endpoint/DSN — no separate feature flag. ### OpenTelemetry traces -- Enabled iff `PYLON_OTEL_TRACES_ENDPOINT` is set to the base URL of an OTLP collector (e.g. `http://alloy:4318`). +- Enabled iff `PYLON_OTEL_COLLECTOR_ENDPOINT` is set to the base URL of an OTLP collector (e.g. `http://alloy:4318`). Traces are exported via OTLP HTTP/protobuf to `/v1/traces`. - When enabled, auto-instrumentation covers: the Litestar HTTP server (incoming requests), `httpx` and `aiohttp` (outgoing HTTP, including web3/EVM RPC), `SQLAlchemy` (database), and `asyncio`. The active span's `trace_id` / diff --git a/pylon_service/pylon_service/envs/test_env.template b/pylon_service/pylon_service/envs/test_env.template index d94fabb0..c8f3bd04 100644 --- a/pylon_service/pylon_service/envs/test_env.template +++ b/pylon_service/pylon_service/envs/test_env.template @@ -42,7 +42,7 @@ PYLON_OTEL_SERVICE_VERSION= # Leave empty to disable OpenTelemetry traces (disabled by default). When set, traces are exported via # OTLP HTTP/protobuf to /v1/traces, and the Litestar HTTP server plus the httpx, aiohttp, # asyncio and SQLAlchemy libraries are auto-instrumented. You must run your own OTLP collector at this address. -PYLON_OTEL_TRACES_ENDPOINT= +PYLON_OTEL_COLLECTOR_ENDPOINT= # Sentry Configuration # DSN address for pylon to use - leave empty to disable sentry integration. diff --git a/pylon_service/pylon_service/otel_config.py b/pylon_service/pylon_service/otel_config.py index 8dfd77aa..58326ca3 100644 --- a/pylon_service/pylon_service/otel_config.py +++ b/pylon_service/pylon_service/otel_config.py @@ -25,7 +25,7 @@ def init_otel() -> None: resource = Resource.create(otel_settings.resource_attributes()) provider = TracerProvider(resource=resource) provider.add_span_processor( - BatchSpanProcessor(OTLPSpanExporter(endpoint=f"{otel_settings.traces_endpoint}/v1/traces")) + BatchSpanProcessor(OTLPSpanExporter(endpoint=f"{otel_settings.collector_endpoint}/v1/traces")) ) trace.set_tracer_provider(provider) diff --git a/pylon_service/pylon_service/settings.py b/pylon_service/pylon_service/settings.py index 5e8f7eaf..70b5ca6a 100644 --- a/pylon_service/pylon_service/settings.py +++ b/pylon_service/pylon_service/settings.py @@ -82,7 +82,7 @@ class OtelSettings(BaseSettings): deployment_environment: str = Field(default_factory=lambda: settings.environment) service_instance_id: str = _DEFAULT_SERVICE_INSTANCE_ID service_version: str = "" - traces_endpoint: str = "" + collector_endpoint: str = "" model_config = SettingsConfigDict( env_file=ENV_FILE, @@ -96,7 +96,7 @@ def traces_enabled(self) -> bool: """ Return whether traces export is enabled (a non-empty endpoint is configured). """ - return bool(self.traces_endpoint) + return bool(self.collector_endpoint) def resource_attributes(self) -> dict[str, str]: """Return OTEL resource attributes as dotted-key fields for log injection.""" diff --git a/pylon_service/tests/test_otel_config.py b/pylon_service/tests/test_otel_config.py index 24762370..67bc9edf 100644 --- a/pylon_service/tests/test_otel_config.py +++ b/pylon_service/tests/test_otel_config.py @@ -14,12 +14,12 @@ ], ) def test_traces_enabled_reflects_endpoint(endpoint, expected): - assert OtelSettings(traces_endpoint=endpoint).traces_enabled is expected + assert OtelSettings(collector_endpoint=endpoint).traces_enabled is expected def test_init_otel_is_noop_when_disabled(): with ( - patch("pylon_service.otel_config.otel_settings", OtelSettings(traces_endpoint="")), + patch("pylon_service.otel_config.otel_settings", OtelSettings(collector_endpoint="")), patch("pylon_service.otel_config.trace.set_tracer_provider") as set_provider, patch("pylon_service.otel_config.HTTPXClientInstrumentor") as httpx_instrumentor, patch("pylon_service.otel_config.AioHttpClientInstrumentor") as aiohttp_instrumentor, @@ -40,7 +40,7 @@ def test_init_otel_installs_provider_and_instruments_when_enabled(): db_engine = MagicMock(sync_engine=sync_engine) with ( - patch("pylon_service.otel_config.otel_settings", OtelSettings(traces_endpoint="http://alloy:4318")), + patch("pylon_service.otel_config.otel_settings", OtelSettings(collector_endpoint="http://alloy:4318")), patch("pylon_service.otel_config.db_engine", db_engine), patch("pylon_service.otel_config.trace.set_tracer_provider") as set_provider, patch("pylon_service.otel_config.OTLPSpanExporter") as exporter, From ae04a875d62350f66300f60c13acbefe4ee72477 Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Fri, 19 Jun 2026 15:24:12 +0200 Subject: [PATCH 03/12] Fixes --- pylon_commons/pylon_commons/settings.py | 3 + pylon_service/README.md | 14 +++-- .../pylon_service/envs/test_env.template | 8 ++- pylon_service/pylon_service/logging.py | 3 +- pylon_service/pylon_service/otel_config.py | 11 ++-- pylon_service/pylon_service/settings.py | 11 +++- .../pylon_service/uvicorn_entrypoint.py | 9 +++ pylon_service/pyproject.toml | 3 + pylon_service/tests/test_otel_config.py | 58 ++++++++++++++++- .../tests/test_uvicorn_entrypoint.py | 62 +++++++++++++++++++ 10 files changed, 168 insertions(+), 14 deletions(-) create mode 100644 pylon_service/tests/test_uvicorn_entrypoint.py diff --git a/pylon_commons/pylon_commons/settings.py b/pylon_commons/pylon_commons/settings.py index a46ee0ac..554fe8e5 100644 --- a/pylon_commons/pylon_commons/settings.py +++ b/pylon_commons/pylon_commons/settings.py @@ -54,6 +54,9 @@ class Settings(BaseSettings): max_request_timeout_seconds: float = 300.0 # evm + # WARNING: these URLs are recorded as-is in telemetry (OpenTelemetry spans, debug logs, and the + # Prometheus `rpc_url` metric label), so do NOT embed credentials in them (no `user:pass@` and no + # path/query API keys like `/v2/`). If a provider requires authentication, pass it out of band. evm_rpc_url: evm_types.RpcUrl = evm_types.RpcUrl("https://lite.chain.opentensor.ai") evm_archive_rpc_url: evm_types.RpcUrl = evm_types.RpcUrl("https://archive.chain.opentensor.ai") evm_archive_blocks_cutoff: ArchiveBlocksCutoff = ArchiveBlocksCutoff(300) diff --git a/pylon_service/README.md b/pylon_service/README.md index a71c4f8f..781cc420 100644 --- a/pylon_service/README.md +++ b/pylon_service/README.md @@ -480,15 +480,21 @@ by setting an endpoint/DSN — no separate feature flag. - Enabled iff `PYLON_OTEL_COLLECTOR_ENDPOINT` is set to the base URL of an OTLP collector (e.g. `http://alloy:4318`). Traces are exported via OTLP HTTP/protobuf to `/v1/traces`. - When enabled, auto-instrumentation covers: the Litestar HTTP server (incoming requests), `httpx` and `aiohttp` - (outgoing HTTP, including web3/EVM RPC), `SQLAlchemy` (database), and `asyncio`. The active span's `trace_id` / - `span_id` are injected into structured logs for log↔trace correlation. + (outgoing HTTP — chain RPC over `httpx`, web3/EVM RPC over `aiohttp`), and `SQLAlchemy` (database). The active span's + `trace_id` / `span_id` are injected into structured logs for log↔trace correlation. +- Outgoing HTTP URLs are recorded on spans verbatim, so do **not** embed credentials in the configured RPC URLs + (`PYLON_EVM_RPC_URL`, `PYLON_EVM_ARCHIVE_RPC_URL`, or an `http(s)://` `PYLON_BITTENSOR_NETWORK`) — see the warning in + `pylon_commons/settings.py`. The same URLs also appear in debug logs and the Prometheus `rpc_url` metric label. +- `asyncio` instrumentation is opt-in via `PYLON_OTEL_INSTRUMENT_ASYNCIO=true` (default off): it patches hot asyncio + primitives but emits no spans without further `OTEL_PYTHON_ASYNCIO_*` configuration, so it stays off by default. - **Not traced:** the default Bittensor chain transport in `turbobt` is websockets, for which no OpenTelemetry instrumentation exists — so chain RPC calls are not auto-traced. They are covered only when `PYLON_BITTENSOR_NETWORK` points at an `http(s)://` URI, where `turbobt` falls back to `httpx`. - The service does not ship a collector. Running and configuring Alloy (or any OTLP collector) at the configured endpoint, including any tail-sampling or endpoint filtering, is the deployer's responsibility. -- Traces require the service to run as a single uvicorn process; `--workers` is rejected (see - `uvicorn_entrypoint.py`) because the SDK is initialised once at import and would not survive `fork()`. +- Traces require the service to run as a single uvicorn process; both `--workers` and `WEB_CONCURRENCY` (other than + `1`) are rejected (see `uvicorn_entrypoint.py`) because the SDK is initialised once at import and would not survive + `fork()`. ## Change checklist diff --git a/pylon_service/pylon_service/envs/test_env.template b/pylon_service/pylon_service/envs/test_env.template index c8f3bd04..d90fe484 100644 --- a/pylon_service/pylon_service/envs/test_env.template +++ b/pylon_service/pylon_service/envs/test_env.template @@ -40,10 +40,14 @@ PYLON_OTEL_SERVICE_VERSION= # OTLP traces exporter endpoint - base URL of the collector, e.g. http://alloy:4318. # Leave empty to disable OpenTelemetry traces (disabled by default). When set, traces are exported via -# OTLP HTTP/protobuf to /v1/traces, and the Litestar HTTP server plus the httpx, aiohttp, -# asyncio and SQLAlchemy libraries are auto-instrumented. You must run your own OTLP collector at this address. +# OTLP HTTP/protobuf to /v1/traces, and the Litestar HTTP server plus the httpx, aiohttp +# and SQLAlchemy libraries are auto-instrumented. You must run your own OTLP collector at this address. PYLON_OTEL_COLLECTOR_ENDPOINT= +# Opt in to asyncio instrumentation (default false). It patches hot asyncio primitives but emits no spans +# without further OTEL_PYTHON_ASYNCIO_* configuration, so it is kept off by default. +# PYLON_OTEL_INSTRUMENT_ASYNCIO=false + # Sentry Configuration # DSN address for pylon to use - leave empty to disable sentry integration. PYLON_SENTRY_DSN= diff --git a/pylon_service/pylon_service/logging.py b/pylon_service/pylon_service/logging.py index 40948d2d..c3a47c50 100644 --- a/pylon_service/pylon_service/logging.py +++ b/pylon_service/pylon_service/logging.py @@ -6,6 +6,7 @@ import structlog from litestar.logging import LoggingConfig +from opentelemetry import trace from pylon_service.middleware.request_id import current_request_id from pylon_service.settings import otel_settings, settings @@ -58,8 +59,6 @@ def add_otel_context_to_structlog( event_dict: EventDict, ) -> EventDict: """Structlog processor injecting the active span's trace_id and span_id into the log event.""" - from opentelemetry import trace - ctx = trace.get_current_span().get_span_context() if ctx.is_valid: event_dict["trace_id"] = format(ctx.trace_id, "032x") diff --git a/pylon_service/pylon_service/otel_config.py b/pylon_service/pylon_service/otel_config.py index 58326ca3..b190a13f 100644 --- a/pylon_service/pylon_service/otel_config.py +++ b/pylon_service/pylon_service/otel_config.py @@ -16,8 +16,10 @@ def init_otel() -> None: """ Initialize OpenTelemetry tracing if a traces endpoint is configured. - Sets up a TracerProvider exporting via OTLP HTTP/protobuf and auto-instruments the - httpx, aiohttp, asyncio, and SQLAlchemy libraries. A no-op when no endpoint is set. + Sets up a TracerProvider exporting via OTLP HTTP/protobuf and auto-instruments the httpx, + aiohttp, and SQLAlchemy libraries. asyncio instrumentation is opt-in via + ``PYLON_OTEL_INSTRUMENT_ASYNCIO`` because it patches hot asyncio primitives yet emits no spans + without further configuration. A no-op when no endpoint is set. """ if not otel_settings.traces_enabled: return @@ -25,11 +27,12 @@ def init_otel() -> None: resource = Resource.create(otel_settings.resource_attributes()) provider = TracerProvider(resource=resource) provider.add_span_processor( - BatchSpanProcessor(OTLPSpanExporter(endpoint=f"{otel_settings.collector_endpoint}/v1/traces")) + BatchSpanProcessor(OTLPSpanExporter(endpoint=f"{otel_settings.normalized_collector_endpoint}/v1/traces")) ) trace.set_tracer_provider(provider) HTTPXClientInstrumentor().instrument() AioHttpClientInstrumentor().instrument() - AsyncioInstrumentor().instrument() SQLAlchemyInstrumentor().instrument(engine=db_engine.sync_engine) + if otel_settings.instrument_asyncio: + AsyncioInstrumentor().instrument() diff --git a/pylon_service/pylon_service/settings.py b/pylon_service/pylon_service/settings.py index 70b5ca6a..df4e4a4d 100644 --- a/pylon_service/pylon_service/settings.py +++ b/pylon_service/pylon_service/settings.py @@ -83,6 +83,7 @@ class OtelSettings(BaseSettings): service_instance_id: str = _DEFAULT_SERVICE_INSTANCE_ID service_version: str = "" collector_endpoint: str = "" + instrument_asyncio: bool = False model_config = SettingsConfigDict( env_file=ENV_FILE, @@ -91,12 +92,20 @@ class OtelSettings(BaseSettings): extra="ignore", ) + @property + def normalized_collector_endpoint(self) -> str: + """ + Return the collector endpoint with surrounding whitespace and trailing slashes removed, + so signal paths can be appended without producing a double slash. + """ + return self.collector_endpoint.strip().rstrip("/") + @property def traces_enabled(self) -> bool: """ Return whether traces export is enabled (a non-empty endpoint is configured). """ - return bool(self.collector_endpoint) + return bool(self.normalized_collector_endpoint) def resource_attributes(self) -> dict[str, str]: """Return OTEL resource attributes as dotted-key fields for log injection.""" diff --git a/pylon_service/pylon_service/uvicorn_entrypoint.py b/pylon_service/pylon_service/uvicorn_entrypoint.py index ff7fdd4c..b71d2fcd 100644 --- a/pylon_service/pylon_service/uvicorn_entrypoint.py +++ b/pylon_service/pylon_service/uvicorn_entrypoint.py @@ -20,6 +20,15 @@ def main() -> None: if any(arg == "--workers" or arg.startswith("--workers=") for arg in sys.argv[1:]): raise RuntimeError("Passing --workers is not supported for pylon-service.") + # uvicorn also reads the worker count from the WEB_CONCURRENCY environment variable (common in + # container setups), which would silently fork workers without --workers ever being passed. + web_concurrency = os.environ.get("WEB_CONCURRENCY") + if web_concurrency is not None and web_concurrency.strip() not in ("", "1"): + raise RuntimeError( + f"WEB_CONCURRENCY={web_concurrency!r} is not supported for pylon-service; " + "it must run as a single process (set WEB_CONCURRENCY=1 or leave it unset)." + ) + host = os.environ.get("PYLON_UVICORN_HOST", "0.0.0.0") port = int(os.environ.get("PYLON_UVICORN_PORT", "8000")) auto_reload = settings.debug diff --git a/pylon_service/pyproject.toml b/pylon_service/pyproject.toml index dbfc3f56..5373dc32 100644 --- a/pylon_service/pyproject.toml +++ b/pylon_service/pyproject.toml @@ -18,6 +18,9 @@ dependencies = [ "aiosqlite>=0.22.1", "web3>=7.16.0", "structlog>=25,<26", + # OpenTelemetry uses split versioning: the stable API/SDK/exporters track 1.x while the + # instrumentation packages track 0.Nb pre-releases, and both are released together (the 1.4x + # line pairs with 0.63b). The differing pin styles are intentional, not a mismatch. "opentelemetry-sdk~=1.41", "opentelemetry-exporter-otlp-proto-http~=1.41", "opentelemetry-instrumentation-asgi~=0.63b1", diff --git a/pylon_service/tests/test_otel_config.py b/pylon_service/tests/test_otel_config.py index 67bc9edf..a593721a 100644 --- a/pylon_service/tests/test_otel_config.py +++ b/pylon_service/tests/test_otel_config.py @@ -10,6 +10,7 @@ ("endpoint", "expected"), [ pytest.param("", False, id="empty_endpoint_disabled"), + pytest.param(" ", False, id="whitespace_only_disabled"), pytest.param("http://alloy:4318", True, id="set_endpoint_enabled"), ], ) @@ -43,6 +44,8 @@ def test_init_otel_installs_provider_and_instruments_when_enabled(): patch("pylon_service.otel_config.otel_settings", OtelSettings(collector_endpoint="http://alloy:4318")), patch("pylon_service.otel_config.db_engine", db_engine), patch("pylon_service.otel_config.trace.set_tracer_provider") as set_provider, + patch("pylon_service.otel_config.TracerProvider"), + patch("pylon_service.otel_config.BatchSpanProcessor"), patch("pylon_service.otel_config.OTLPSpanExporter") as exporter, patch("pylon_service.otel_config.HTTPXClientInstrumentor") as httpx_instrumentor, patch("pylon_service.otel_config.AioHttpClientInstrumentor") as aiohttp_instrumentor, @@ -55,8 +58,61 @@ def test_init_otel_installs_provider_and_instruments_when_enabled(): exporter.assert_called_once_with(endpoint="http://alloy:4318/v1/traces") httpx_instrumentor.return_value.instrument.assert_called_once_with() aiohttp_instrumentor.return_value.instrument.assert_called_once_with() - asyncio_instrumentor.return_value.instrument.assert_called_once_with() sqlalchemy_instrumentor.return_value.instrument.assert_called_once_with(engine=sync_engine) + asyncio_instrumentor.return_value.instrument.assert_not_called() + + +@pytest.mark.parametrize( + ("instrument_asyncio", "should_instrument"), + [ + pytest.param(True, True, id="asyncio_opt_in_instrumented"), + pytest.param(False, False, id="asyncio_default_off_not_instrumented"), + ], +) +def test_init_otel_instruments_asyncio_only_when_opted_in(instrument_asyncio, should_instrument): + settings = OtelSettings(collector_endpoint="http://alloy:4318", instrument_asyncio=instrument_asyncio) + + with ( + patch("pylon_service.otel_config.otel_settings", settings), + patch("pylon_service.otel_config.db_engine", MagicMock()), + patch("pylon_service.otel_config.trace.set_tracer_provider"), + patch("pylon_service.otel_config.TracerProvider"), + patch("pylon_service.otel_config.BatchSpanProcessor"), + patch("pylon_service.otel_config.OTLPSpanExporter"), + patch("pylon_service.otel_config.HTTPXClientInstrumentor"), + patch("pylon_service.otel_config.AioHttpClientInstrumentor"), + patch("pylon_service.otel_config.AsyncioInstrumentor") as asyncio_instrumentor, + patch("pylon_service.otel_config.SQLAlchemyInstrumentor"), + ): + init_otel() + + assert asyncio_instrumentor.return_value.instrument.called is should_instrument + + +@pytest.mark.parametrize( + ("collector_endpoint", "expected_exporter_endpoint"), + [ + pytest.param("http://alloy:4318", "http://alloy:4318/v1/traces", id="bare_endpoint"), + pytest.param("http://alloy:4318/", "http://alloy:4318/v1/traces", id="trailing_slash"), + pytest.param(" http://alloy:4318/ ", "http://alloy:4318/v1/traces", id="whitespace_and_slash"), + ], +) +def test_init_otel_normalizes_collector_endpoint(collector_endpoint, expected_exporter_endpoint): + with ( + patch("pylon_service.otel_config.otel_settings", OtelSettings(collector_endpoint=collector_endpoint)), + patch("pylon_service.otel_config.db_engine", MagicMock()), + patch("pylon_service.otel_config.trace.set_tracer_provider"), + patch("pylon_service.otel_config.TracerProvider"), + patch("pylon_service.otel_config.BatchSpanProcessor"), + patch("pylon_service.otel_config.OTLPSpanExporter") as exporter, + patch("pylon_service.otel_config.HTTPXClientInstrumentor"), + patch("pylon_service.otel_config.AioHttpClientInstrumentor"), + patch("pylon_service.otel_config.AsyncioInstrumentor"), + patch("pylon_service.otel_config.SQLAlchemyInstrumentor"), + ): + init_otel() + + exporter.assert_called_once_with(endpoint=expected_exporter_endpoint) @pytest.mark.parametrize( diff --git a/pylon_service/tests/test_uvicorn_entrypoint.py b/pylon_service/tests/test_uvicorn_entrypoint.py new file mode 100644 index 00000000..eca3f374 --- /dev/null +++ b/pylon_service/tests/test_uvicorn_entrypoint.py @@ -0,0 +1,62 @@ +from unittest.mock import patch + +import pytest + +from pylon_service.uvicorn_entrypoint import main + + +@pytest.mark.parametrize( + "argv", + [ + pytest.param(["--workers", "4"], id="workers_space_separated"), + pytest.param(["--workers=4"], id="workers_equals"), + ], +) +def test_main_rejects_workers_argument(argv): + with ( + patch("pylon_service.uvicorn_entrypoint.sys.argv", ["pylon-service", *argv]), + patch("pylon_service.uvicorn_entrypoint.uvicorn_main") as uvicorn_main, + pytest.raises(RuntimeError, match="--workers is not supported"), + ): + main() + + uvicorn_main.assert_not_called() + + +@pytest.mark.parametrize( + "web_concurrency", + [ + pytest.param("4", id="multiple_workers"), + pytest.param("2", id="two_workers"), + pytest.param("0", id="zero_workers"), + ], +) +def test_main_rejects_web_concurrency_above_one(web_concurrency): + with ( + patch("pylon_service.uvicorn_entrypoint.sys.argv", ["pylon-service"]), + patch.dict("pylon_service.uvicorn_entrypoint.os.environ", {"WEB_CONCURRENCY": web_concurrency}), + patch("pylon_service.uvicorn_entrypoint.uvicorn_main") as uvicorn_main, + pytest.raises(RuntimeError, match="WEB_CONCURRENCY"), + ): + main() + + uvicorn_main.assert_not_called() + + +@pytest.mark.parametrize( + "environ", + [ + pytest.param({}, id="web_concurrency_unset"), + pytest.param({"WEB_CONCURRENCY": "1"}, id="web_concurrency_one"), + pytest.param({"WEB_CONCURRENCY": " 1 "}, id="web_concurrency_one_padded"), + ], +) +def test_main_starts_uvicorn_for_single_process(environ): + with ( + patch("pylon_service.uvicorn_entrypoint.sys.argv", ["pylon-service"]), + patch.dict("pylon_service.uvicorn_entrypoint.os.environ", environ, clear=True), + patch("pylon_service.uvicorn_entrypoint.uvicorn_main") as uvicorn_main, + ): + main() + + uvicorn_main.assert_called_once() From 2535290e6c17e28bfd516c6069e801ebbc07a9d8 Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Fri, 19 Jun 2026 16:04:14 +0200 Subject: [PATCH 04/12] Remove asyncio instrumentation --- pylon_service/README.md | 2 -- .../pylon_service/envs/test_env.template | 4 --- pylon_service/pylon_service/otel_config.py | 7 +--- pylon_service/pylon_service/settings.py | 1 - pylon_service/pyproject.toml | 1 - pylon_service/tests/test_otel_config.py | 32 ------------------- pylon_service/uv.lock | 17 ---------- 7 files changed, 1 insertion(+), 63 deletions(-) diff --git a/pylon_service/README.md b/pylon_service/README.md index 781cc420..aee2f4c2 100644 --- a/pylon_service/README.md +++ b/pylon_service/README.md @@ -485,8 +485,6 @@ by setting an endpoint/DSN — no separate feature flag. - Outgoing HTTP URLs are recorded on spans verbatim, so do **not** embed credentials in the configured RPC URLs (`PYLON_EVM_RPC_URL`, `PYLON_EVM_ARCHIVE_RPC_URL`, or an `http(s)://` `PYLON_BITTENSOR_NETWORK`) — see the warning in `pylon_commons/settings.py`. The same URLs also appear in debug logs and the Prometheus `rpc_url` metric label. -- `asyncio` instrumentation is opt-in via `PYLON_OTEL_INSTRUMENT_ASYNCIO=true` (default off): it patches hot asyncio - primitives but emits no spans without further `OTEL_PYTHON_ASYNCIO_*` configuration, so it stays off by default. - **Not traced:** the default Bittensor chain transport in `turbobt` is websockets, for which no OpenTelemetry instrumentation exists — so chain RPC calls are not auto-traced. They are covered only when `PYLON_BITTENSOR_NETWORK` points at an `http(s)://` URI, where `turbobt` falls back to `httpx`. diff --git a/pylon_service/pylon_service/envs/test_env.template b/pylon_service/pylon_service/envs/test_env.template index d90fe484..99a6a9c4 100644 --- a/pylon_service/pylon_service/envs/test_env.template +++ b/pylon_service/pylon_service/envs/test_env.template @@ -44,10 +44,6 @@ PYLON_OTEL_SERVICE_VERSION= # and SQLAlchemy libraries are auto-instrumented. You must run your own OTLP collector at this address. PYLON_OTEL_COLLECTOR_ENDPOINT= -# Opt in to asyncio instrumentation (default false). It patches hot asyncio primitives but emits no spans -# without further OTEL_PYTHON_ASYNCIO_* configuration, so it is kept off by default. -# PYLON_OTEL_INSTRUMENT_ASYNCIO=false - # Sentry Configuration # DSN address for pylon to use - leave empty to disable sentry integration. PYLON_SENTRY_DSN= diff --git a/pylon_service/pylon_service/otel_config.py b/pylon_service/pylon_service/otel_config.py index b190a13f..bf64b1fb 100644 --- a/pylon_service/pylon_service/otel_config.py +++ b/pylon_service/pylon_service/otel_config.py @@ -1,7 +1,6 @@ from opentelemetry import trace from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor -from opentelemetry.instrumentation.asyncio import AsyncioInstrumentor from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor from opentelemetry.sdk.resources import Resource @@ -17,9 +16,7 @@ def init_otel() -> None: Initialize OpenTelemetry tracing if a traces endpoint is configured. Sets up a TracerProvider exporting via OTLP HTTP/protobuf and auto-instruments the httpx, - aiohttp, and SQLAlchemy libraries. asyncio instrumentation is opt-in via - ``PYLON_OTEL_INSTRUMENT_ASYNCIO`` because it patches hot asyncio primitives yet emits no spans - without further configuration. A no-op when no endpoint is set. + aiohttp, and SQLAlchemy libraries. A no-op when no endpoint is set. """ if not otel_settings.traces_enabled: return @@ -34,5 +31,3 @@ def init_otel() -> None: HTTPXClientInstrumentor().instrument() AioHttpClientInstrumentor().instrument() SQLAlchemyInstrumentor().instrument(engine=db_engine.sync_engine) - if otel_settings.instrument_asyncio: - AsyncioInstrumentor().instrument() diff --git a/pylon_service/pylon_service/settings.py b/pylon_service/pylon_service/settings.py index df4e4a4d..91718a7a 100644 --- a/pylon_service/pylon_service/settings.py +++ b/pylon_service/pylon_service/settings.py @@ -83,7 +83,6 @@ class OtelSettings(BaseSettings): service_instance_id: str = _DEFAULT_SERVICE_INSTANCE_ID service_version: str = "" collector_endpoint: str = "" - instrument_asyncio: bool = False model_config = SettingsConfigDict( env_file=ENV_FILE, diff --git a/pylon_service/pyproject.toml b/pylon_service/pyproject.toml index 5373dc32..03df74a2 100644 --- a/pylon_service/pyproject.toml +++ b/pylon_service/pyproject.toml @@ -27,7 +27,6 @@ dependencies = [ "opentelemetry-instrumentation-httpx~=0.63b1", "opentelemetry-instrumentation-aiohttp-client~=0.63b1", "opentelemetry-instrumentation-sqlalchemy~=0.63b1", - "opentelemetry-instrumentation-asyncio~=0.63b1", ] [dependency-groups] diff --git a/pylon_service/tests/test_otel_config.py b/pylon_service/tests/test_otel_config.py index a593721a..820ccfa7 100644 --- a/pylon_service/tests/test_otel_config.py +++ b/pylon_service/tests/test_otel_config.py @@ -24,7 +24,6 @@ def test_init_otel_is_noop_when_disabled(): patch("pylon_service.otel_config.trace.set_tracer_provider") as set_provider, patch("pylon_service.otel_config.HTTPXClientInstrumentor") as httpx_instrumentor, patch("pylon_service.otel_config.AioHttpClientInstrumentor") as aiohttp_instrumentor, - patch("pylon_service.otel_config.AsyncioInstrumentor") as asyncio_instrumentor, patch("pylon_service.otel_config.SQLAlchemyInstrumentor") as sqlalchemy_instrumentor, ): init_otel() @@ -32,7 +31,6 @@ def test_init_otel_is_noop_when_disabled(): set_provider.assert_not_called() httpx_instrumentor.assert_not_called() aiohttp_instrumentor.assert_not_called() - asyncio_instrumentor.assert_not_called() sqlalchemy_instrumentor.assert_not_called() @@ -49,7 +47,6 @@ def test_init_otel_installs_provider_and_instruments_when_enabled(): patch("pylon_service.otel_config.OTLPSpanExporter") as exporter, patch("pylon_service.otel_config.HTTPXClientInstrumentor") as httpx_instrumentor, patch("pylon_service.otel_config.AioHttpClientInstrumentor") as aiohttp_instrumentor, - patch("pylon_service.otel_config.AsyncioInstrumentor") as asyncio_instrumentor, patch("pylon_service.otel_config.SQLAlchemyInstrumentor") as sqlalchemy_instrumentor, ): init_otel() @@ -59,34 +56,6 @@ def test_init_otel_installs_provider_and_instruments_when_enabled(): httpx_instrumentor.return_value.instrument.assert_called_once_with() aiohttp_instrumentor.return_value.instrument.assert_called_once_with() sqlalchemy_instrumentor.return_value.instrument.assert_called_once_with(engine=sync_engine) - asyncio_instrumentor.return_value.instrument.assert_not_called() - - -@pytest.mark.parametrize( - ("instrument_asyncio", "should_instrument"), - [ - pytest.param(True, True, id="asyncio_opt_in_instrumented"), - pytest.param(False, False, id="asyncio_default_off_not_instrumented"), - ], -) -def test_init_otel_instruments_asyncio_only_when_opted_in(instrument_asyncio, should_instrument): - settings = OtelSettings(collector_endpoint="http://alloy:4318", instrument_asyncio=instrument_asyncio) - - with ( - patch("pylon_service.otel_config.otel_settings", settings), - patch("pylon_service.otel_config.db_engine", MagicMock()), - patch("pylon_service.otel_config.trace.set_tracer_provider"), - patch("pylon_service.otel_config.TracerProvider"), - patch("pylon_service.otel_config.BatchSpanProcessor"), - patch("pylon_service.otel_config.OTLPSpanExporter"), - patch("pylon_service.otel_config.HTTPXClientInstrumentor"), - patch("pylon_service.otel_config.AioHttpClientInstrumentor"), - patch("pylon_service.otel_config.AsyncioInstrumentor") as asyncio_instrumentor, - patch("pylon_service.otel_config.SQLAlchemyInstrumentor"), - ): - init_otel() - - assert asyncio_instrumentor.return_value.instrument.called is should_instrument @pytest.mark.parametrize( @@ -107,7 +76,6 @@ def test_init_otel_normalizes_collector_endpoint(collector_endpoint, expected_ex patch("pylon_service.otel_config.OTLPSpanExporter") as exporter, patch("pylon_service.otel_config.HTTPXClientInstrumentor"), patch("pylon_service.otel_config.AioHttpClientInstrumentor"), - patch("pylon_service.otel_config.AsyncioInstrumentor"), patch("pylon_service.otel_config.SQLAlchemyInstrumentor"), ): init_otel() diff --git a/pylon_service/uv.lock b/pylon_service/uv.lock index 9dd644a7..ee06d229 100644 --- a/pylon_service/uv.lock +++ b/pylon_service/uv.lock @@ -387,7 +387,6 @@ dependencies = [ { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "opentelemetry-instrumentation-aiohttp-client" }, { name = "opentelemetry-instrumentation-asgi" }, - { name = "opentelemetry-instrumentation-asyncio" }, { name = "opentelemetry-instrumentation-httpx" }, { name = "opentelemetry-instrumentation-sqlalchemy" }, { name = "opentelemetry-sdk" }, @@ -435,7 +434,6 @@ requires-dist = [ { name = "opentelemetry-exporter-otlp-proto-http", specifier = "~=1.41" }, { name = "opentelemetry-instrumentation-aiohttp-client", specifier = "~=0.63b1" }, { name = "opentelemetry-instrumentation-asgi", specifier = "~=0.63b1" }, - { name = "opentelemetry-instrumentation-asyncio", specifier = "~=0.63b1" }, { name = "opentelemetry-instrumentation-httpx", specifier = "~=0.63b1" }, { name = "opentelemetry-instrumentation-sqlalchemy", specifier = "~=0.63b1" }, { name = "opentelemetry-sdk", specifier = "~=1.41" }, @@ -1782,21 +1780,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/57/7e/83986f27b421de04fab1e1a84e892621dac42e6432a9c66779505f4d1381/opentelemetry_instrumentation_asgi-0.63b1-py3-none-any.whl", hash = "sha256:1a22453dfa965f14799b10a674b8acbcb897a8a75c79136060af54214cc7886e", size = 15906, upload-time = "2026-05-21T16:35:04.162Z" }, ] -[[package]] -name = "opentelemetry-instrumentation-asyncio" -version = "0.63b1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "opentelemetry-api" }, - { name = "opentelemetry-instrumentation" }, - { name = "opentelemetry-semantic-conventions" }, - { name = "wrapt" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b2/1a/206fcf577eee1a1e88b37a01d837e25fcad27af3514d4697b32a0f04c425/opentelemetry_instrumentation_asyncio-0.63b1.tar.gz", hash = "sha256:0ae623583dcbe0ae17d63c995906d02a64213652ff180875ace680d1e6c286ec", size = 13942, upload-time = "2026-05-21T16:36:20.271Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/28/25/c98e90c803b3f9bcf1c1e8b1284d022f2894d45f2d350f8c64f75209523c/opentelemetry_instrumentation_asyncio-0.63b1-py3-none-any.whl", hash = "sha256:0baa80c9314569fbb868e07f0b8136da367203c2942df5813c184bde2537f44a", size = 13098, upload-time = "2026-05-21T16:35:06.83Z" }, -] - [[package]] name = "opentelemetry-instrumentation-httpx" version = "0.63b1" From a23ceec1116c3499fda7187ab2fe1cd08b2433b5 Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Mon, 22 Jun 2026 09:32:03 +0200 Subject: [PATCH 05/12] Span links --- .../pylon_service/api/_unstable/tasks.py | 46 +++- .../tests/unit/test_apply_weights_tracing.py | 214 ++++++++++++++++++ 2 files changed, 255 insertions(+), 5 deletions(-) create mode 100644 pylon_service/tests/unit/test_apply_weights_tracing.py diff --git a/pylon_service/pylon_service/api/_unstable/tasks.py b/pylon_service/pylon_service/api/_unstable/tasks.py index dfec0e99..155cc7ae 100644 --- a/pylon_service/pylon_service/api/_unstable/tasks.py +++ b/pylon_service/pylon_service/api/_unstable/tasks.py @@ -3,6 +3,10 @@ from abc import ABC, abstractmethod from typing import Any, ClassVar, TypeVar +from opentelemetry import trace +from opentelemetry.context import Context +from opentelemetry.trace import Link, SpanContext +from opentelemetry.util.types import AttributeValue from prometheus_client import Histogram from pylon_commons.models import Block, CommitReveal from pylon_commons.types import ( @@ -46,6 +50,7 @@ from pylon_service.settings import settings logger = logging.getLogger(__name__) +_tracer = trace.get_tracer(__name__) class StopRetrying(Exception): @@ -79,10 +84,14 @@ def __init_subclass__( def __init__(self) -> None: self._running_task: asyncio.Task[ReturnT] | None = None + self._request_span_context: SpanContext | None = None async def schedule(self) -> asyncio.Task[ReturnT]: await self._on_task_scheduled() + request_span_context = trace.get_current_span().get_span_context() + self._request_span_context = request_span_context if request_span_context.is_valid else None + self._running_task = asyncio.create_task(self(), name=self.JOB_NAME) type(self).tasks_running.add(self) self._running_task.add_done_callback(self._on_task_done) @@ -93,13 +102,28 @@ async def __call__(self) -> ReturnT: async def _submit_with_retries(self) -> ReturnT: prepared = False + previous_attempt_context: SpanContext | None = None + attempt_number = 0 async def attempt() -> ReturnT: - nonlocal prepared - if not prepared: - await self._prepare() - prepared = True - return await self._single_attempt() + nonlocal prepared, previous_attempt_context, attempt_number + attempt_number += 1 + links: list[Link] = [] + if self._request_span_context is not None: + links.append(Link(self._request_span_context, attributes={"link.type": "originating_request"})) + if previous_attempt_context is not None and previous_attempt_context.is_valid: + links.append(Link(previous_attempt_context, attributes={"link.type": "previous_attempt"})) + with _tracer.start_as_current_span( + f"{self.JOB_NAME}.attempt", + context=Context(), + links=links, + attributes={"attempt_number": attempt_number, **self._attempt_span_attributes()}, + ) as span: + previous_attempt_context = span.get_span_context() + if not prepared: + await self._prepare() + prepared = True + return await self._single_attempt() retrying = AsyncRetrying( stop=stop_after_attempt(self._retry_attempts + 1), @@ -150,6 +174,12 @@ def _on_task_done(self, task: asyncio.Task[ReturnT]) -> None: else: logger.info("Task %s (%s) finished successfully.", task, self.JOB_NAME) + def _attempt_span_attributes(self) -> dict[str, AttributeValue]: + """ + Per-task attributes attached to each attempt span; overridden by subclasses that have them. + """ + return {} + async def _on_task_scheduled(self) -> None: pass @@ -214,6 +244,12 @@ def _retry_attempts(self) -> int: def _retry_delay_seconds(self) -> int: return settings.weights_retry_delay_seconds + def _attempt_span_attributes(self) -> dict[str, AttributeValue]: + """ + Attach netuid and hotkey to each attempt span for trace filtering. + """ + return {"netuid": self._netuid, "hotkey": self._hotkey} + async def _on_task_scheduled(self) -> None: if self._is_rescheduled: return diff --git a/pylon_service/tests/unit/test_apply_weights_tracing.py b/pylon_service/tests/unit/test_apply_weights_tracing.py new file mode 100644 index 00000000..a1b67164 --- /dev/null +++ b/pylon_service/tests/unit/test_apply_weights_tracing.py @@ -0,0 +1,214 @@ +""" +Tests that the ApplyWeights background task emits one fresh trace per retry attempt, linked to the +originating request span and to the previous attempt (the causal chain), so long-running jobs never +extend a single trace across the whole run. +""" + +from contextlib import asynccontextmanager + +import pytest +from litestar.testing import AsyncTestClient +from opentelemetry.sdk.trace import ReadableSpan, TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from pylon_commons.models import Block, CommitReveal, SubnetHyperparams +from pylon_commons.types import BlockHash, BlockNumber, Hotkey, IdentityName, MechanismId, NetUid, Weight + +from pylon_service.db.models import TaskStatus, WeightTask +from tests.helpers import wait_for_apply_weights_tasks +from tests.integration.localchain.dev_accounts import DevAccount + +ATTEMPT_SPAN_NAME = "apply_weights.attempt" + + +@pytest.fixture +def started_test_app_factory(test_app): + @asynccontextmanager + async def _factory(): + async with AsyncTestClient(app=test_app) as client: + yield client + + return _factory + + +@pytest.fixture +def in_memory_spans(monkeypatch): + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + tracer = provider.get_tracer("test") + monkeypatch.setattr("pylon_service.api._unstable.tasks._tracer", tracer) + return exporter, tracer + + +def _attempt_spans(exporter: InMemorySpanExporter) -> list[ReadableSpan]: + return [span for span in exporter.get_finished_spans() if span.name == ATTEMPT_SPAN_NAME] + + +def _trace_id(span: ReadableSpan) -> int: + assert span.context is not None + return span.context.trace_id + + +def _attributes(span: ReadableSpan) -> dict[str, object]: + assert span.attributes is not None + return dict(span.attributes) + + +def _link_topology(span: ReadableSpan) -> list[tuple[int, object]]: + return sorted((link.context.trace_id, (link.attributes or {})["link.type"]) for link in span.links) + + +@pytest.fixture +def force_three_attempts(monkeypatch): + monkeypatch.setattr("pylon_service.settings.settings.weights_retry_attempts", 3) + monkeypatch.setattr("pylon_service.settings.settings.weights_retry_delay_seconds", 0) + + +_RETRY_BEHAVIOR = { + "get_latest_block": [ + RuntimeError("Network error"), + RuntimeError("Network error"), + Block(number=BlockNumber(1000), hash=BlockHash("0xabc123")), + Block(number=BlockNumber(1000), hash=BlockHash("0xabc123")), + Block(number=BlockNumber(1000), hash=BlockHash("0xabc123")), + Block(number=BlockNumber(1000), hash=BlockHash("0xabc123")), + Block(number=BlockNumber(1000), hash=BlockHash("0xabc123")), + ], + "get_hyperparams": [ + SubnetHyperparams(commit_reveal_weights_enabled=CommitReveal.DISABLED), + SubnetHyperparams(commit_reveal_weights_enabled=CommitReveal.DISABLED), + ], + "set_weights": [None], +} + + +@pytest.mark.asyncio +async def test_each_attempt_is_a_fresh_trace_linked_to_request_and_previous_attempt( + identity_test_client_factory, + mock_bt_client_factory, + force_three_attempts, + in_memory_spans, +): + """ + Three attempts produce three distinct traces, none equal to the request trace, each linked to the + request and (from the second on) to the previous attempt. + """ + exporter, tracer = in_memory_spans + async with mock_bt_client_factory("sn1") as mock_client: + async with mock_client.mock_behavior(**_RETRY_BEHAVIOR): + async with identity_test_client_factory("sn1") as client: + with tracer.start_as_current_span("incoming_request") as request_span: + request_trace_id = request_span.get_span_context().trace_id + response = await client.put( + "/api/_unstable/identity/sn1/subnet/1/mechanism/1/weights", + json={"weights": {"hotkey1": 0.7, "hotkey2": 0.3}}, + ) + + assert response.status_code == 200, response.content + + await wait_for_apply_weights_tasks() + + attempts = _attempt_spans(exporter) + attempt_trace_ids = [_trace_id(span) for span in attempts] + + assert (len(attempt_trace_ids), len(set(attempt_trace_ids)), request_trace_id in attempt_trace_ids) == ( + 3, + 3, + False, + ) + assert [_link_topology(span) for span in attempts] == [ + sorted([(request_trace_id, "originating_request")]), + sorted([(request_trace_id, "originating_request"), (attempt_trace_ids[0], "previous_attempt")]), + sorted([(request_trace_id, "originating_request"), (attempt_trace_ids[1], "previous_attempt")]), + ] + + +@pytest.mark.asyncio +async def test_attempt_spans_carry_attempt_number_and_task_attributes( + identity_test_client_factory, + mock_bt_client_factory, + force_three_attempts, + in_memory_spans, +): + """ + Each attempt span carries its attempt number and the task's netuid and hotkey. + """ + exporter, _ = in_memory_spans + async with mock_bt_client_factory("sn1") as mock_client: + async with mock_client.mock_behavior(**_RETRY_BEHAVIOR): + async with identity_test_client_factory("sn1") as client: + response = await client.put( + "/api/_unstable/identity/sn1/subnet/1/mechanism/1/weights", + json={"weights": {"hotkey1": 0.7, "hotkey2": 0.3}}, + ) + + assert response.status_code == 200, response.content + + await wait_for_apply_weights_tasks() + + attempts = _attempt_spans(exporter) + + assert [_attributes(span) for span in attempts] == [ + {"attempt_number": 1, "netuid": 1, "hotkey": DevAccount.ALICE.hotkey_ss58}, + {"attempt_number": 2, "netuid": 1, "hotkey": DevAccount.ALICE.hotkey_ss58}, + {"attempt_number": 3, "netuid": 1, "hotkey": DevAccount.ALICE.hotkey_ss58}, + ] + + +@pytest.mark.asyncio +async def test_rescheduled_task_attempt_has_no_links( + seed_running_weight_task_before_reschedule, + started_test_app_factory, + mock_bt_client_factory, + in_memory_spans, +): + """ + A task rescheduled on startup has no originating request, so its attempt span carries no links. + """ + exporter, _ = in_memory_spans + weight_task = WeightTask( + identity_name=IdentityName("sn1"), + netuid=NetUid(1), + hotkey=DevAccount.ALICE.hotkey_ss58, + mechanism_id=MechanismId(0), + weights={Hotkey("hotkey1"): Weight(0.5)}, + status=TaskStatus.RUNNING, + start_block_number=BlockNumber(1000), + ) + seed_running_weight_task_before_reschedule.append(weight_task) + + async with mock_bt_client_factory("sn1") as mock_client: + async with mock_client.mock_behavior(): + async with started_test_app_factory(): + await wait_for_apply_weights_tasks() + + attempts = _attempt_spans(exporter) + + assert [len(span.links) for span in attempts] == [0] + + +@pytest.mark.asyncio +async def test_no_spans_exported_when_tracing_disabled( + identity_test_client_factory, + mock_bt_client_factory, + force_three_attempts, +): + """ + Without a configured tracer the no-op proxy records nothing, so no attempt spans are exported. + """ + exporter = InMemorySpanExporter() + + async with mock_bt_client_factory("sn1") as mock_client: + async with mock_client.mock_behavior(**_RETRY_BEHAVIOR): + async with identity_test_client_factory("sn1") as client: + response = await client.put( + "/api/_unstable/identity/sn1/subnet/1/mechanism/1/weights", + json={"weights": {"hotkey1": 0.7, "hotkey2": 0.3}}, + ) + + assert response.status_code == 200, response.content + + await wait_for_apply_weights_tasks() + + assert exporter.get_finished_spans() == () From 10780dfc54a4ce3377a267e075a1dfa4ed6ac700 Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Mon, 22 Jun 2026 12:57:31 +0200 Subject: [PATCH 06/12] test: remove vacuous test_no_spans_exported_when_tracing_disabled The test created a local InMemorySpanExporter that was never wired to any TracerProvider or the module _tracer, so its assertion was always true and verified nothing. A regression of spans leaking to the global provider would have passed unnoticed. --- .../tests/unit/test_apply_weights_tracing.py | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/pylon_service/tests/unit/test_apply_weights_tracing.py b/pylon_service/tests/unit/test_apply_weights_tracing.py index a1b67164..003bd238 100644 --- a/pylon_service/tests/unit/test_apply_weights_tracing.py +++ b/pylon_service/tests/unit/test_apply_weights_tracing.py @@ -186,29 +186,3 @@ async def test_rescheduled_task_attempt_has_no_links( attempts = _attempt_spans(exporter) assert [len(span.links) for span in attempts] == [0] - - -@pytest.mark.asyncio -async def test_no_spans_exported_when_tracing_disabled( - identity_test_client_factory, - mock_bt_client_factory, - force_three_attempts, -): - """ - Without a configured tracer the no-op proxy records nothing, so no attempt spans are exported. - """ - exporter = InMemorySpanExporter() - - async with mock_bt_client_factory("sn1") as mock_client: - async with mock_client.mock_behavior(**_RETRY_BEHAVIOR): - async with identity_test_client_factory("sn1") as client: - response = await client.put( - "/api/_unstable/identity/sn1/subnet/1/mechanism/1/weights", - json={"weights": {"hotkey1": 0.7, "hotkey2": 0.3}}, - ) - - assert response.status_code == 200, response.content - - await wait_for_apply_weights_tasks() - - assert exporter.get_finished_spans() == () From 3a4774be03fdfe3138f519837cfda621eb36d45e Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Mon, 22 Jun 2026 13:06:17 +0200 Subject: [PATCH 07/12] docs: document Tempo max_trace_live requirement for long submission spans --- pylon_service/README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pylon_service/README.md b/pylon_service/README.md index aee2f4c2..8660e651 100644 --- a/pylon_service/README.md +++ b/pylon_service/README.md @@ -490,6 +490,13 @@ by setting an endpoint/DSN — no separate feature flag. `PYLON_BITTENSOR_NETWORK` points at an `http(s)://` URI, where `turbobt` falls back to `httpx`. - The service does not ship a collector. Running and configuring Alloy (or any OTLP collector) at the configured endpoint, including any tail-sampling or endpoint filtering, is the deployer's responsibility. +- **Long-lived on-chain submission spans:** background submission tasks (`apply_weights`, `set_commitment`, + `set_revealed_commitment`) emit one short, self-contained span per retry attempt, each with its own `trace_id` and + span links back to the originating request and the previous attempt — this keeps traces short across the (up to 200) + retries. A *single* attempt can still take up to 120s while waiting for extrinsic finalization (~12s per block, longer + under congestion). Backends that bound trace lifetime — notably Tempo's `max_trace_live` (default 30s) — will split + such an attempt's trace. If you use Tempo, set `max_trace_live` to at least 180s (a margin above the 120s submission + timeout) and `max_trace_idle` to at least 30s. - Traces require the service to run as a single uvicorn process; both `--workers` and `WEB_CONCURRENCY` (other than `1`) are rejected (see `uvicorn_entrypoint.py`) because the SDK is initialised once at import and would not survive `fork()`. From fea43ed731a22319f02e7e80dc08ccd8714795e4 Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Mon, 22 Jun 2026 13:11:10 +0200 Subject: [PATCH 08/12] refactor: derive attempt span number from tenacity retry state Drop the duplicate nonlocal attempt_number counter in _submit_with_retries and read retrying.statistics["attempt_number"] instead, so the attempt span and the retry log share tenacity's single source of truth. --- pylon_service/pylon_service/api/_unstable/tasks.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pylon_service/pylon_service/api/_unstable/tasks.py b/pylon_service/pylon_service/api/_unstable/tasks.py index 155cc7ae..753a783b 100644 --- a/pylon_service/pylon_service/api/_unstable/tasks.py +++ b/pylon_service/pylon_service/api/_unstable/tasks.py @@ -103,11 +103,9 @@ async def __call__(self) -> ReturnT: async def _submit_with_retries(self) -> ReturnT: prepared = False previous_attempt_context: SpanContext | None = None - attempt_number = 0 async def attempt() -> ReturnT: - nonlocal prepared, previous_attempt_context, attempt_number - attempt_number += 1 + nonlocal prepared, previous_attempt_context links: list[Link] = [] if self._request_span_context is not None: links.append(Link(self._request_span_context, attributes={"link.type": "originating_request"})) @@ -117,7 +115,10 @@ async def attempt() -> ReturnT: f"{self.JOB_NAME}.attempt", context=Context(), links=links, - attributes={"attempt_number": attempt_number, **self._attempt_span_attributes()}, + attributes={ + "attempt_number": retrying.statistics["attempt_number"], + **self._attempt_span_attributes(), + }, ) as span: previous_attempt_context = span.get_span_context() if not prepared: From b5a469d0cc2c72fea4688c0fce01cff253b2cd78 Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Mon, 22 Jun 2026 13:46:24 +0200 Subject: [PATCH 09/12] refactor: centralize span-link constants and span-context helper Introduce pylon_service/tracing.py with a TraceLinkType StrEnum (link type members plus the link.type attribute key via enum.nonmember) and a get_current_valid_span_context() helper. Use them in the ApplyWeights/SetCommitment retry loop and the structlog OTEL processor, replacing the duplicated 'fetch current span context, drop if invalid' idiom and the magic link-attribute strings. Both attempt links are now normalized at write time and built symmetrically. The tracing test consumes the same enum, so a typo on either side breaks the test instead of passing silently. --- .../pylon_service/api/_unstable/tasks.py | 24 ++++++++++++------ pylon_service/pylon_service/logging.py | 6 ++--- pylon_service/pylon_service/tracing.py | 25 +++++++++++++++++++ .../tests/unit/test_apply_weights_tracing.py | 19 +++++++++++--- 4 files changed, 60 insertions(+), 14 deletions(-) create mode 100644 pylon_service/pylon_service/tracing.py diff --git a/pylon_service/pylon_service/api/_unstable/tasks.py b/pylon_service/pylon_service/api/_unstable/tasks.py index 753a783b..28c241e0 100644 --- a/pylon_service/pylon_service/api/_unstable/tasks.py +++ b/pylon_service/pylon_service/api/_unstable/tasks.py @@ -48,6 +48,7 @@ track_operation, ) from pylon_service.settings import settings +from pylon_service.tracing import TraceLinkType, get_current_valid_span_context logger = logging.getLogger(__name__) _tracer = trace.get_tracer(__name__) @@ -89,8 +90,7 @@ def __init__(self) -> None: async def schedule(self) -> asyncio.Task[ReturnT]: await self._on_task_scheduled() - request_span_context = trace.get_current_span().get_span_context() - self._request_span_context = request_span_context if request_span_context.is_valid else None + self._request_span_context = get_current_valid_span_context() self._running_task = asyncio.create_task(self(), name=self.JOB_NAME) type(self).tasks_running.add(self) @@ -108,9 +108,19 @@ async def attempt() -> ReturnT: nonlocal prepared, previous_attempt_context links: list[Link] = [] if self._request_span_context is not None: - links.append(Link(self._request_span_context, attributes={"link.type": "originating_request"})) - if previous_attempt_context is not None and previous_attempt_context.is_valid: - links.append(Link(previous_attempt_context, attributes={"link.type": "previous_attempt"})) + links.append( + Link( + self._request_span_context, + attributes={TraceLinkType.ATTRIBUTE_KEY: TraceLinkType.ORIGINATING_REQUEST}, + ) + ) + if previous_attempt_context is not None: + links.append( + Link( + previous_attempt_context, + attributes={TraceLinkType.ATTRIBUTE_KEY: TraceLinkType.PREVIOUS_ATTEMPT}, + ) + ) with _tracer.start_as_current_span( f"{self.JOB_NAME}.attempt", context=Context(), @@ -119,8 +129,8 @@ async def attempt() -> ReturnT: "attempt_number": retrying.statistics["attempt_number"], **self._attempt_span_attributes(), }, - ) as span: - previous_attempt_context = span.get_span_context() + ): + previous_attempt_context = get_current_valid_span_context() if not prepared: await self._prepare() prepared = True diff --git a/pylon_service/pylon_service/logging.py b/pylon_service/pylon_service/logging.py index c3a47c50..e0c2f179 100644 --- a/pylon_service/pylon_service/logging.py +++ b/pylon_service/pylon_service/logging.py @@ -6,10 +6,10 @@ import structlog from litestar.logging import LoggingConfig -from opentelemetry import trace from pylon_service.middleware.request_id import current_request_id from pylon_service.settings import otel_settings, settings +from pylon_service.tracing import get_current_valid_span_context if TYPE_CHECKING: from structlog.typing import EventDict, WrappedLogger @@ -59,8 +59,8 @@ def add_otel_context_to_structlog( event_dict: EventDict, ) -> EventDict: """Structlog processor injecting the active span's trace_id and span_id into the log event.""" - ctx = trace.get_current_span().get_span_context() - if ctx.is_valid: + ctx = get_current_valid_span_context() + if ctx is not None: event_dict["trace_id"] = format(ctx.trace_id, "032x") event_dict["span_id"] = format(ctx.span_id, "016x") return event_dict diff --git a/pylon_service/pylon_service/tracing.py b/pylon_service/pylon_service/tracing.py new file mode 100644 index 00000000..6d41d164 --- /dev/null +++ b/pylon_service/pylon_service/tracing.py @@ -0,0 +1,25 @@ +from enum import StrEnum, nonmember + +from opentelemetry import trace +from opentelemetry.trace import SpanContext + + +class TraceLinkType(StrEnum): + """ + Kind of span link attached to a retry attempt span, and the attribute key it is stored under. + """ + + ATTRIBUTE_KEY = nonmember("link.type") + + ORIGINATING_REQUEST = "originating_request" + PREVIOUS_ATTEMPT = "previous_attempt" + + +def get_current_valid_span_context() -> SpanContext | None: + """ + Return the active span's context, or None when there is no valid active span. + + The context is invalid when tracing is disabled or when running outside any span. + """ + ctx = trace.get_current_span().get_span_context() + return ctx if ctx.is_valid else None diff --git a/pylon_service/tests/unit/test_apply_weights_tracing.py b/pylon_service/tests/unit/test_apply_weights_tracing.py index 003bd238..71e66c7c 100644 --- a/pylon_service/tests/unit/test_apply_weights_tracing.py +++ b/pylon_service/tests/unit/test_apply_weights_tracing.py @@ -15,6 +15,7 @@ from pylon_commons.types import BlockHash, BlockNumber, Hotkey, IdentityName, MechanismId, NetUid, Weight from pylon_service.db.models import TaskStatus, WeightTask +from pylon_service.tracing import TraceLinkType from tests.helpers import wait_for_apply_weights_tasks from tests.integration.localchain.dev_accounts import DevAccount @@ -56,7 +57,7 @@ def _attributes(span: ReadableSpan) -> dict[str, object]: def _link_topology(span: ReadableSpan) -> list[tuple[int, object]]: - return sorted((link.context.trace_id, (link.attributes or {})["link.type"]) for link in span.links) + return sorted((link.context.trace_id, (link.attributes or {})[TraceLinkType.ATTRIBUTE_KEY]) for link in span.links) @pytest.fixture @@ -118,9 +119,19 @@ async def test_each_attempt_is_a_fresh_trace_linked_to_request_and_previous_atte False, ) assert [_link_topology(span) for span in attempts] == [ - sorted([(request_trace_id, "originating_request")]), - sorted([(request_trace_id, "originating_request"), (attempt_trace_ids[0], "previous_attempt")]), - sorted([(request_trace_id, "originating_request"), (attempt_trace_ids[1], "previous_attempt")]), + sorted([(request_trace_id, TraceLinkType.ORIGINATING_REQUEST)]), + sorted( + [ + (request_trace_id, TraceLinkType.ORIGINATING_REQUEST), + (attempt_trace_ids[0], TraceLinkType.PREVIOUS_ATTEMPT), + ] + ), + sorted( + [ + (request_trace_id, TraceLinkType.ORIGINATING_REQUEST), + (attempt_trace_ids[1], TraceLinkType.PREVIOUS_ATTEMPT), + ] + ), ] From 11c221bf9e8eb732ea1d96af41502b75f437b55f Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Mon, 22 Jun 2026 15:23:13 +0200 Subject: [PATCH 10/12] refactor: extract attempt-span observability into context manager Pull the span-link building, attempt-span opening, and previous-attempt context bookkeeping out of the nested attempt() closure in _submit_with_retries into a dedicated _attempt_span context manager on BackgroundTask, leaving attempt() with only the prepare/single-attempt logic. --- .../pylon_service/api/_unstable/tasks.py | 62 +++++++++++-------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/pylon_service/pylon_service/api/_unstable/tasks.py b/pylon_service/pylon_service/api/_unstable/tasks.py index 28c241e0..4dcef9bf 100644 --- a/pylon_service/pylon_service/api/_unstable/tasks.py +++ b/pylon_service/pylon_service/api/_unstable/tasks.py @@ -1,6 +1,8 @@ import asyncio import logging from abc import ABC, abstractmethod +from collections.abc import Iterator +from contextlib import contextmanager from typing import Any, ClassVar, TypeVar from opentelemetry import trace @@ -86,6 +88,7 @@ def __init_subclass__( def __init__(self) -> None: self._running_task: asyncio.Task[ReturnT] | None = None self._request_span_context: SpanContext | None = None + self._previous_attempt_context: SpanContext | None = None async def schedule(self) -> asyncio.Task[ReturnT]: await self._on_task_scheduled() @@ -100,37 +103,42 @@ async def schedule(self) -> asyncio.Task[ReturnT]: async def __call__(self) -> ReturnT: return await self._submit_with_retries() + @contextmanager + def _attempt_span(self, attempt_number: int) -> Iterator[None]: + links: list[Link] = [] + if self._request_span_context is not None: + links.append( + Link( + self._request_span_context, + attributes={TraceLinkType.ATTRIBUTE_KEY: TraceLinkType.ORIGINATING_REQUEST}, + ) + ) + if self._previous_attempt_context is not None: + links.append( + Link( + self._previous_attempt_context, + attributes={TraceLinkType.ATTRIBUTE_KEY: TraceLinkType.PREVIOUS_ATTEMPT}, + ) + ) + with _tracer.start_as_current_span( + f"{self.JOB_NAME}.attempt", + context=Context(), + links=links, + attributes={ + "attempt_number": attempt_number, + **self._attempt_span_attributes(), + }, + ): + self._previous_attempt_context = get_current_valid_span_context() + yield + async def _submit_with_retries(self) -> ReturnT: prepared = False - previous_attempt_context: SpanContext | None = None + self._previous_attempt_context = None async def attempt() -> ReturnT: - nonlocal prepared, previous_attempt_context - links: list[Link] = [] - if self._request_span_context is not None: - links.append( - Link( - self._request_span_context, - attributes={TraceLinkType.ATTRIBUTE_KEY: TraceLinkType.ORIGINATING_REQUEST}, - ) - ) - if previous_attempt_context is not None: - links.append( - Link( - previous_attempt_context, - attributes={TraceLinkType.ATTRIBUTE_KEY: TraceLinkType.PREVIOUS_ATTEMPT}, - ) - ) - with _tracer.start_as_current_span( - f"{self.JOB_NAME}.attempt", - context=Context(), - links=links, - attributes={ - "attempt_number": retrying.statistics["attempt_number"], - **self._attempt_span_attributes(), - }, - ): - previous_attempt_context = get_current_valid_span_context() + nonlocal prepared + with self._attempt_span(retrying.statistics["attempt_number"]): if not prepared: await self._prepare() prepared = True From 415af701e70bf66fad7a07ed71671bbbbc6e3c5e Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Mon, 22 Jun 2026 15:43:59 +0200 Subject: [PATCH 11/12] fix: address tracing review findings (test collection, semconv keys, docs) - move test_otel_config.py and test_uvicorn_entrypoint.py into tests/unit/ so the default `nox -s test` session (collects tests/unit/ only) runs them - use opentelemetry semconv constants for OTEL resource attribute keys instead of hand-typed dotted-string literals - update CLAUDE.md Monitoring section to reflect SentrySettings/OtelSettings split and the PYLON_ENVIRONMENT single source of truth --- CLAUDE.md | 10 ++++++++-- pylon_service/pylon_service/settings.py | 12 +++++++----- pylon_service/tests/{ => unit}/test_otel_config.py | 0 .../tests/{ => unit}/test_uvicorn_entrypoint.py | 0 4 files changed, 15 insertions(+), 7 deletions(-) rename pylon_service/tests/{ => unit}/test_otel_config.py (100%) rename pylon_service/tests/{ => unit}/test_uvicorn_entrypoint.py (100%) diff --git a/CLAUDE.md b/CLAUDE.md index 2af94a6f..18d421e7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -231,8 +231,14 @@ All settings use the `PYLON_` prefix. Example: `PYLON_BITTENSOR_NETWORK=finney` - By default, data is cached for all subnets configured in identities - **Monitoring**: - - `sentry_dsn`: Sentry DSN for error tracking (optional) - - `sentry_environment`: Environment name for Sentry (default: "development") + - `environment` (in `pylon_commons/settings.py`, env `PYLON_ENVIRONMENT`): Deployment environment name; single source of truth for both Sentry and OTEL (default: "production") + - Sentry (`SentrySettings` in `pylon_service/settings.py`, env prefix `PYLON_SENTRY_`): + - `dsn` (`PYLON_SENTRY_DSN`): Sentry DSN for error tracking; empty disables Sentry (default: "") + - `environment` (`PYLON_SENTRY_ENVIRONMENT`): Sentry environment name; falls back to `PYLON_ENVIRONMENT` when left unset + - OpenTelemetry traces (`OtelSettings` in `pylon_service/settings.py`, env prefix `PYLON_OTEL_`): + - `collector_endpoint` (`PYLON_OTEL_COLLECTOR_ENDPOINT`): Base URL of an OTLP collector; empty disables traces (default: "") + - `deployment_environment` (`PYLON_OTEL_DEPLOYMENT_ENVIRONMENT`): OTEL `deployment.environment.name` attribute; falls back to `PYLON_ENVIRONMENT` when left unset + - `service_version` (`PYLON_OTEL_SERVICE_VERSION`): OTEL `service.version` attribute; injected automatically at Docker build time (default: "") - **Development**: - `docker_image_name`: Docker image name (default: "bittensor_pylon") diff --git a/pylon_service/pylon_service/settings.py b/pylon_service/pylon_service/settings.py index 91718a7a..fbc3487a 100644 --- a/pylon_service/pylon_service/settings.py +++ b/pylon_service/pylon_service/settings.py @@ -2,6 +2,8 @@ from typing import Self from litestar.config.response_cache import ResponseCacheConfig +from opentelemetry.semconv._incubating.attributes.deployment_attributes import DEPLOYMENT_ENVIRONMENT_NAME +from opentelemetry.semconv.resource import ResourceAttributes from pydantic import Field, model_validator from pydantic_settings import BaseSettings, SettingsConfigDict from pylon_commons.settings import ENV_FILE, Settings @@ -109,13 +111,13 @@ def traces_enabled(self) -> bool: def resource_attributes(self) -> dict[str, str]: """Return OTEL resource attributes as dotted-key fields for log injection.""" attrs = { - "service.namespace": self.service_namespace, - "service.name": self.service_name, - "deployment.environment.name": self.deployment_environment, - "service.instance.id": self.service_instance_id, + ResourceAttributes.SERVICE_NAMESPACE: self.service_namespace, + ResourceAttributes.SERVICE_NAME: self.service_name, + DEPLOYMENT_ENVIRONMENT_NAME: self.deployment_environment, + ResourceAttributes.SERVICE_INSTANCE_ID: self.service_instance_id, } if self.service_version: - attrs["service.version"] = self.service_version + attrs[ResourceAttributes.SERVICE_VERSION] = self.service_version return attrs diff --git a/pylon_service/tests/test_otel_config.py b/pylon_service/tests/unit/test_otel_config.py similarity index 100% rename from pylon_service/tests/test_otel_config.py rename to pylon_service/tests/unit/test_otel_config.py diff --git a/pylon_service/tests/test_uvicorn_entrypoint.py b/pylon_service/tests/unit/test_uvicorn_entrypoint.py similarity index 100% rename from pylon_service/tests/test_uvicorn_entrypoint.py rename to pylon_service/tests/unit/test_uvicorn_entrypoint.py From fda6b761ed48f5821de14ac5de1f4766868f265d Mon Sep 17 00:00:00 2001 From: Kacper Wolkiewicz Date: Mon, 22 Jun 2026 16:04:54 +0200 Subject: [PATCH 12/12] Removed some unnecessery tests --- .../tests/unit/test_apply_weights_tracing.py | 199 ------------------ .../tests/unit/test_uvicorn_entrypoint.py | 62 ------ 2 files changed, 261 deletions(-) delete mode 100644 pylon_service/tests/unit/test_apply_weights_tracing.py delete mode 100644 pylon_service/tests/unit/test_uvicorn_entrypoint.py diff --git a/pylon_service/tests/unit/test_apply_weights_tracing.py b/pylon_service/tests/unit/test_apply_weights_tracing.py deleted file mode 100644 index 71e66c7c..00000000 --- a/pylon_service/tests/unit/test_apply_weights_tracing.py +++ /dev/null @@ -1,199 +0,0 @@ -""" -Tests that the ApplyWeights background task emits one fresh trace per retry attempt, linked to the -originating request span and to the previous attempt (the causal chain), so long-running jobs never -extend a single trace across the whole run. -""" - -from contextlib import asynccontextmanager - -import pytest -from litestar.testing import AsyncTestClient -from opentelemetry.sdk.trace import ReadableSpan, TracerProvider -from opentelemetry.sdk.trace.export import SimpleSpanProcessor -from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter -from pylon_commons.models import Block, CommitReveal, SubnetHyperparams -from pylon_commons.types import BlockHash, BlockNumber, Hotkey, IdentityName, MechanismId, NetUid, Weight - -from pylon_service.db.models import TaskStatus, WeightTask -from pylon_service.tracing import TraceLinkType -from tests.helpers import wait_for_apply_weights_tasks -from tests.integration.localchain.dev_accounts import DevAccount - -ATTEMPT_SPAN_NAME = "apply_weights.attempt" - - -@pytest.fixture -def started_test_app_factory(test_app): - @asynccontextmanager - async def _factory(): - async with AsyncTestClient(app=test_app) as client: - yield client - - return _factory - - -@pytest.fixture -def in_memory_spans(monkeypatch): - exporter = InMemorySpanExporter() - provider = TracerProvider() - provider.add_span_processor(SimpleSpanProcessor(exporter)) - tracer = provider.get_tracer("test") - monkeypatch.setattr("pylon_service.api._unstable.tasks._tracer", tracer) - return exporter, tracer - - -def _attempt_spans(exporter: InMemorySpanExporter) -> list[ReadableSpan]: - return [span for span in exporter.get_finished_spans() if span.name == ATTEMPT_SPAN_NAME] - - -def _trace_id(span: ReadableSpan) -> int: - assert span.context is not None - return span.context.trace_id - - -def _attributes(span: ReadableSpan) -> dict[str, object]: - assert span.attributes is not None - return dict(span.attributes) - - -def _link_topology(span: ReadableSpan) -> list[tuple[int, object]]: - return sorted((link.context.trace_id, (link.attributes or {})[TraceLinkType.ATTRIBUTE_KEY]) for link in span.links) - - -@pytest.fixture -def force_three_attempts(monkeypatch): - monkeypatch.setattr("pylon_service.settings.settings.weights_retry_attempts", 3) - monkeypatch.setattr("pylon_service.settings.settings.weights_retry_delay_seconds", 0) - - -_RETRY_BEHAVIOR = { - "get_latest_block": [ - RuntimeError("Network error"), - RuntimeError("Network error"), - Block(number=BlockNumber(1000), hash=BlockHash("0xabc123")), - Block(number=BlockNumber(1000), hash=BlockHash("0xabc123")), - Block(number=BlockNumber(1000), hash=BlockHash("0xabc123")), - Block(number=BlockNumber(1000), hash=BlockHash("0xabc123")), - Block(number=BlockNumber(1000), hash=BlockHash("0xabc123")), - ], - "get_hyperparams": [ - SubnetHyperparams(commit_reveal_weights_enabled=CommitReveal.DISABLED), - SubnetHyperparams(commit_reveal_weights_enabled=CommitReveal.DISABLED), - ], - "set_weights": [None], -} - - -@pytest.mark.asyncio -async def test_each_attempt_is_a_fresh_trace_linked_to_request_and_previous_attempt( - identity_test_client_factory, - mock_bt_client_factory, - force_three_attempts, - in_memory_spans, -): - """ - Three attempts produce three distinct traces, none equal to the request trace, each linked to the - request and (from the second on) to the previous attempt. - """ - exporter, tracer = in_memory_spans - async with mock_bt_client_factory("sn1") as mock_client: - async with mock_client.mock_behavior(**_RETRY_BEHAVIOR): - async with identity_test_client_factory("sn1") as client: - with tracer.start_as_current_span("incoming_request") as request_span: - request_trace_id = request_span.get_span_context().trace_id - response = await client.put( - "/api/_unstable/identity/sn1/subnet/1/mechanism/1/weights", - json={"weights": {"hotkey1": 0.7, "hotkey2": 0.3}}, - ) - - assert response.status_code == 200, response.content - - await wait_for_apply_weights_tasks() - - attempts = _attempt_spans(exporter) - attempt_trace_ids = [_trace_id(span) for span in attempts] - - assert (len(attempt_trace_ids), len(set(attempt_trace_ids)), request_trace_id in attempt_trace_ids) == ( - 3, - 3, - False, - ) - assert [_link_topology(span) for span in attempts] == [ - sorted([(request_trace_id, TraceLinkType.ORIGINATING_REQUEST)]), - sorted( - [ - (request_trace_id, TraceLinkType.ORIGINATING_REQUEST), - (attempt_trace_ids[0], TraceLinkType.PREVIOUS_ATTEMPT), - ] - ), - sorted( - [ - (request_trace_id, TraceLinkType.ORIGINATING_REQUEST), - (attempt_trace_ids[1], TraceLinkType.PREVIOUS_ATTEMPT), - ] - ), - ] - - -@pytest.mark.asyncio -async def test_attempt_spans_carry_attempt_number_and_task_attributes( - identity_test_client_factory, - mock_bt_client_factory, - force_three_attempts, - in_memory_spans, -): - """ - Each attempt span carries its attempt number and the task's netuid and hotkey. - """ - exporter, _ = in_memory_spans - async with mock_bt_client_factory("sn1") as mock_client: - async with mock_client.mock_behavior(**_RETRY_BEHAVIOR): - async with identity_test_client_factory("sn1") as client: - response = await client.put( - "/api/_unstable/identity/sn1/subnet/1/mechanism/1/weights", - json={"weights": {"hotkey1": 0.7, "hotkey2": 0.3}}, - ) - - assert response.status_code == 200, response.content - - await wait_for_apply_weights_tasks() - - attempts = _attempt_spans(exporter) - - assert [_attributes(span) for span in attempts] == [ - {"attempt_number": 1, "netuid": 1, "hotkey": DevAccount.ALICE.hotkey_ss58}, - {"attempt_number": 2, "netuid": 1, "hotkey": DevAccount.ALICE.hotkey_ss58}, - {"attempt_number": 3, "netuid": 1, "hotkey": DevAccount.ALICE.hotkey_ss58}, - ] - - -@pytest.mark.asyncio -async def test_rescheduled_task_attempt_has_no_links( - seed_running_weight_task_before_reschedule, - started_test_app_factory, - mock_bt_client_factory, - in_memory_spans, -): - """ - A task rescheduled on startup has no originating request, so its attempt span carries no links. - """ - exporter, _ = in_memory_spans - weight_task = WeightTask( - identity_name=IdentityName("sn1"), - netuid=NetUid(1), - hotkey=DevAccount.ALICE.hotkey_ss58, - mechanism_id=MechanismId(0), - weights={Hotkey("hotkey1"): Weight(0.5)}, - status=TaskStatus.RUNNING, - start_block_number=BlockNumber(1000), - ) - seed_running_weight_task_before_reschedule.append(weight_task) - - async with mock_bt_client_factory("sn1") as mock_client: - async with mock_client.mock_behavior(): - async with started_test_app_factory(): - await wait_for_apply_weights_tasks() - - attempts = _attempt_spans(exporter) - - assert [len(span.links) for span in attempts] == [0] diff --git a/pylon_service/tests/unit/test_uvicorn_entrypoint.py b/pylon_service/tests/unit/test_uvicorn_entrypoint.py deleted file mode 100644 index eca3f374..00000000 --- a/pylon_service/tests/unit/test_uvicorn_entrypoint.py +++ /dev/null @@ -1,62 +0,0 @@ -from unittest.mock import patch - -import pytest - -from pylon_service.uvicorn_entrypoint import main - - -@pytest.mark.parametrize( - "argv", - [ - pytest.param(["--workers", "4"], id="workers_space_separated"), - pytest.param(["--workers=4"], id="workers_equals"), - ], -) -def test_main_rejects_workers_argument(argv): - with ( - patch("pylon_service.uvicorn_entrypoint.sys.argv", ["pylon-service", *argv]), - patch("pylon_service.uvicorn_entrypoint.uvicorn_main") as uvicorn_main, - pytest.raises(RuntimeError, match="--workers is not supported"), - ): - main() - - uvicorn_main.assert_not_called() - - -@pytest.mark.parametrize( - "web_concurrency", - [ - pytest.param("4", id="multiple_workers"), - pytest.param("2", id="two_workers"), - pytest.param("0", id="zero_workers"), - ], -) -def test_main_rejects_web_concurrency_above_one(web_concurrency): - with ( - patch("pylon_service.uvicorn_entrypoint.sys.argv", ["pylon-service"]), - patch.dict("pylon_service.uvicorn_entrypoint.os.environ", {"WEB_CONCURRENCY": web_concurrency}), - patch("pylon_service.uvicorn_entrypoint.uvicorn_main") as uvicorn_main, - pytest.raises(RuntimeError, match="WEB_CONCURRENCY"), - ): - main() - - uvicorn_main.assert_not_called() - - -@pytest.mark.parametrize( - "environ", - [ - pytest.param({}, id="web_concurrency_unset"), - pytest.param({"WEB_CONCURRENCY": "1"}, id="web_concurrency_one"), - pytest.param({"WEB_CONCURRENCY": " 1 "}, id="web_concurrency_one_padded"), - ], -) -def test_main_starts_uvicorn_for_single_process(environ): - with ( - patch("pylon_service.uvicorn_entrypoint.sys.argv", ["pylon-service"]), - patch.dict("pylon_service.uvicorn_entrypoint.os.environ", environ, clear=True), - patch("pylon_service.uvicorn_entrypoint.uvicorn_main") as uvicorn_main, - ): - main() - - uvicorn_main.assert_called_once()