diff --git a/README.md b/README.md
index 4d6b7dda9..7932df84e 100644
--- a/README.md
+++ b/README.md
@@ -58,3 +58,20 @@ To run the server, see the [server installation documentation](https://uktrade.g
 ## Development
 
 See our full development guide and coding standards on our [contribution guide](https://uktrade.github.io/matchbox/contributing/).
+
+## Local development with Datadog
+
+When iterating the Datadog configuration, environment variables can be set in several ways:
+
+1. **Datadog configuration**: Create a `.datadog.env` file with your Datadog API key and other agent settings
+2. **Compose override**: Use `docker-compose.override.yml` for local-specific variable overrides
+
+Variables in `.datadog.env` will override any defaults set in the compose file.
+
+Example `.datadog.env`:
+
+```
+DD_API_KEY=your_api_key_here
+```
+
+The Docker Compose file will automatically set `DD_ENV=local-{username}` for local development isolation.
diff --git a/environments/development.env b/environments/development.env
index 16d9760c2..349e0eeef 100644
--- a/environments/development.env
+++ b/environments/development.env
@@ -2,7 +2,7 @@ MB__DEV__API_PORT=8000
 MB__DEV__DATASTORE_CONSOLE_PORT=9003
 MB__DEV__DATASTORE_PORT=9002
 MB__DEV__WAREHOUSE_PORT=7654
-MB__DEV__POSTGRES_BACKEND_PORT=5432  # Change to 9876 here and server.env to avoid conflict with other services
+MB__DEV__POSTGRES_BACKEND_PORT=9876  # Change to 9876 here and server.env to avoid conflict with other services
 
 MB__SERVER__API_KEY=matchbox-api-key
 MB__SERVER__BACKEND_TYPE=postgres
@@ -17,7 +17,7 @@ MB__SERVER__DATASTORE__DEFAULT_REGION=eu-west-2
 MB__SERVER__DATASTORE__CACHE_BUCKET_NAME=cache
 
 MB__SERVER__POSTGRES__HOST=localhost
-MB__SERVER__POSTGRES__PORT=5432  # Change to 9876 here and server.env to avoid conflict with other services
+MB__SERVER__POSTGRES__PORT=9876  # Change to 9876 here and server.env to avoid conflict with other services
 MB__SERVER__POSTGRES__USER=matchbox_user
 MB__SERVER__POSTGRES__PASSWORD=matchbox_password
 MB__SERVER__POSTGRES__DATABASE=matchbox
diff --git a/environments/server.env b/environments/server.env
index bb669db81..3650e852f 100644
--- a/environments/server.env
+++ b/environments/server.env
@@ -2,7 +2,7 @@ MB__DEV__API_PORT=8000
 MB__DEV__DATASTORE_CONSOLE_PORT=9003
 MB__DEV__DATASTORE_PORT=9002
 MB__DEV__WAREHOUSE_PORT=7654
-MB__DEV__POSTGRES_BACKEND_PORT=5432  # Change to 9876 here and development.env to avoid conflict with other services
+MB__DEV__POSTGRES_BACKEND_PORT=9876  # Change to 9876 here and development.env to avoid conflict with other services
 
 MB__SERVER__API_KEY=matchbox-api-key
 MB__SERVER__BACKEND_TYPE=postgres
diff --git a/src/matchbox/client/_handler.py b/src/matchbox/client/_handler.py
index 6562d5de4..a378e9bf1 100644
--- a/src/matchbox/client/_handler.py
+++ b/src/matchbox/client/_handler.py
@@ -158,21 +158,21 @@ def login(user_name: str) -> int:
 
 
 def query(
-    source: SourceResolutionName,
+    sources: list[SourceResolutionName],
     return_leaf_id: bool,
     resolution: ResolutionName | None = None,
     threshold: int | None = None,
     limit: int | None = None,
 ) -> Table:
-    """Query a source in Matchbox."""
-    log_prefix = f"Query {source}"
+    """Query multiple sources in Matchbox."""
+    log_prefix = f"Query {', '.join(sources)}"
     logger.debug(f"Using {resolution}", prefix=log_prefix)
 
     res = CLIENT.get(
         "/query",
         params=url_params(
             {
-                "source": source,
+                "sources": sources,
                 "resolution": resolution,
                 "return_leaf_id": return_leaf_id,
                 "threshold": threshold,
@@ -438,7 +438,8 @@ def sample_for_eval(n: int, resolution: ModelResolutionName, user_id: int) -> Ta
         params=url_params({"n": n, "resolution": resolution, "user_id": user_id}),
     )
 
-    return read_table(BytesIO(res.content))
+    buffer = BytesIO(res.content)
+    return read_table(buffer)
 
 
 def compare_models(resolutions: list[ModelResolutionName]) -> ModelComparison:
diff --git a/src/matchbox/client/extract.py b/src/matchbox/client/extract.py
index e666209a8..2a847082b 100644
--- a/src/matchbox/client/extract.py
+++ b/src/matchbox/client/extract.py
@@ -34,25 +34,44 @@ def key_field_map(
     source_mb_ids: list[ArrowTable] = []
     source_to_key_field: dict[str, str] = {}
 
+    # Store source names and key field mappings
+    source_names = [s.name for s in sources]
     for s in sources:
-        # Get Matchbox IDs from backend
+        source_to_key_field[s.name] = s.key_field.name
+
+    if len(sources) == 1:
+        # Single source - make individual call
         source_mb_ids.append(
             _handler.query(
-                source=s.name,
+                sources=[sources[0].name],
                 resolution=resolution,
                 return_leaf_id=False,
             )
         )
+    else:
+        # Multiple sources - make single multi-source call
+        combined_result = _handler.query(
+            sources=source_names,
+            resolution=resolution,
+            return_leaf_id=False,
+        )
 
-        source_to_key_field[s.name] = s.key_field.name
+        # Split the combined result by source
+        import polars as pl
+
+        combined_df = pl.from_arrow(combined_result)
+        for source_name in source_names:
+            source_data = combined_df.filter(pl.col("source") == source_name).to_arrow()
+            source_mb_ids.append(source_data)
 
     # Join Matchbox IDs to form mapping table
-    mapping = source_mb_ids[0]
+    mapping = source_mb_ids[0].select(["id", "key"])
     mapping = mapping.rename_columns({"key": sources[0].qualified_key})
     if len(sources) > 1:
         for s, mb_ids in zip(sources[1:], source_mb_ids[1:], strict=True):
+            mb_ids_selected = mb_ids.select(["id", "key"])
             mapping = mapping.join(
-                right_table=mb_ids, keys="id", join_type="full outer"
+                right_table=mb_ids_selected, keys="id", join_type="full outer"
             )
             mapping = mapping.rename_columns({"key": s.qualified_key})
 
diff --git a/src/matchbox/client/helpers/selector.py b/src/matchbox/client/helpers/selector.py
index 6ec755173..08101d3fc 100644
--- a/src/matchbox/client/helpers/selector.py
+++ b/src/matchbox/client/helpers/selector.py
@@ -198,16 +198,27 @@ def _process_selectors(
 
     For batched queries, yield from it.
     """
-    selector_results: list[PolarsDataFrame] = []
-    for selector in selectors:
-        mb_ids = pl.from_arrow(
-            _handler.query(
-                source=selector.source.name,
-                resolution=resolution,
-                threshold=threshold,
-                return_leaf_id=return_leaf_id,
-            )
+    # Group selectors by resolution to make efficient multi-source queries
+    if not selectors:
+        return
+
+    # Make single multi-source query with all selectors
+    source_names = [selector.source.name for selector in selectors]
+
+    # Make single multi-source API call
+    mb_ids = pl.from_arrow(
+        _handler.query(
+            sources=source_names,
+            resolution=resolution,
+            threshold=threshold,
+            return_leaf_id=return_leaf_id,
         )
+    )
+
+    # Process each selector with the multi-source result
+    for selector in selectors:
+        # Filter the multi-source results to this selector's source
+        source_filtered_ids = mb_ids.filter(pl.col("source") == selector.source.name)
 
         raw_batches = selector.source.query(
             qualify_names=True,
@@ -218,15 +229,17 @@ def _process_selectors(
         processed_batches = [
             _process_query_result(
                 data=b,
+                mb_ids=source_filtered_ids,
                 selector=selector,
-                mb_ids=mb_ids,
                 return_leaf_id=return_leaf_id,
             )
             for b in raw_batches
         ]
-        selector_results.append(pl.concat(processed_batches, how="vertical"))
 
-    return selector_results
+        # Concatenate all batches for this selector and yield
+        if processed_batches:
+            selector_result = pl.concat(processed_batches, how="vertical")
+            yield selector_result
 
 
 def query(
diff --git a/src/matchbox/common/arrow.py b/src/matchbox/common/arrow.py
index 2c151f645..0302bb9d0 100644
--- a/src/matchbox/common/arrow.py
+++ b/src/matchbox/common/arrow.py
@@ -5,18 +5,21 @@
 from typing import Final
 
 import pyarrow as pa
-import pyarrow.parquet as pq
 from pyarrow import Schema
 
 from matchbox.common.exceptions import MatchboxArrowSchemaMismatch
 
 SCHEMA_QUERY: Final[pa.Schema] = pa.schema(
-    [("id", pa.int64()), ("key", pa.large_string())]
+    [
+        ("id", pa.int64()),
+        ("key", pa.large_string()),
+        ("source", pa.dictionary(pa.int32(), pa.string())),
+    ]
 )
-"""Data transfer schema for root cluster IDs keyed to primary keys."""
+"""Data transfer schema for root cluster IDs keyed to primary keys with source ID."""
 
 SCHEMA_QUERY_WITH_LEAVES = SCHEMA_QUERY.append(pa.field("leaf_id", pa.int64()))
-"""Data transfer schema for root cluster IDs keyed to primary keys and leaf IDs."""
+"""Data transfer schema for cluster IDs with primary keys, source ID, and leaf IDs."""
 
 
 SCHEMA_INDEX: Final[pa.Schema] = pa.schema(
@@ -70,9 +73,15 @@ class JudgementsZipFilenames(StrEnum):
 
 
 def table_to_buffer(table: pa.Table) -> BytesIO:
-    """Converts an Arrow table to a BytesIO buffer."""
+    """Converts an Arrow table to a BytesIO buffer using Arrow IPC format.
+
+    Uses Arrow IPC format instead of parquet to preserve exact schema fidelity,
+    including uint32 dictionary indices and large_string values.
+    """
     sink = BytesIO()
-    pq.write_table(table, sink)
+    writer = pa.ipc.new_file(sink, table.schema)
+    writer.write_table(table)
+    writer.close()
     sink.seek(0)
     return sink
 
diff --git a/src/matchbox/common/eval.py b/src/matchbox/common/eval.py
index ce195b0ef..fd33c93c7 100644
--- a/src/matchbox/common/eval.py
+++ b/src/matchbox/common/eval.py
@@ -177,7 +177,11 @@ def process_judgements(
         # if missing expansion, assume we're dealing with singleton leaves
         .with_columns(
             pl.when(pl.col("endorsed_leaves").is_null())
-            .then(pl.col("endorsed").map_elements(lambda x: [x]))
+            .then(
+                pl.col("endorsed").map_elements(
+                    lambda x: [x], return_dtype=pl.List(pl.UInt64)
+                )
+            )
             .otherwise(pl.col("endorsed_leaves"))
             .alias("endorsed_leaves")
         )
diff --git a/src/matchbox/common/factories/entities.py b/src/matchbox/common/factories/entities.py
index 9b759946f..4eb64de9f 100644
--- a/src/matchbox/common/factories/entities.py
+++ b/src/matchbox/common/factories/entities.py
@@ -291,12 +291,12 @@ def get_values(
                 raise ValueError(f"SourceConfig not found: {source_name}")
 
             # Get rows for this entity in this source
-            df = source.data.to_pandas()
-            entity_rows = df[df["key"].isin(keys)]
+            df = pl.from_arrow(source.data)
+            entity_rows = df.filter(pl.col("key").is_in(keys))
 
             # Get unique values for each feature in this source
             values[source_name] = {
-                feature.name: sorted(entity_rows[feature.name].unique())
+                feature.name: sorted(entity_rows[feature.name].unique().to_list())
                 for feature in source.features
             }
 
@@ -473,33 +473,43 @@ def query_to_cluster_entities(
     Returns:
         A set of ClusterEntity objects
     """
-    # Convert polars to pandas for compatibility with existing logic
-    if isinstance(query, pl.DataFrame):
-        query = query.to_pandas()
-    elif isinstance(query, pa.Table):
-        query = query.to_pandas()
+    # Convert to polars for efficient processing (avoids pandas uint32 issues)
+    if isinstance(query, pa.Table):
+        query_df = pl.from_arrow(query)
+    elif isinstance(query, pd.DataFrame):
+        query_df = pl.from_pandas(query)
+    else:
+        query_df = query
 
     must_have_fields = set(["id"] + list(keys.values()))
-    if not must_have_fields.issubset(query.columns):
+    if not must_have_fields.issubset(query_df.columns):
         raise ValueError(
-            f"Fields {must_have_fields.difference(query.columns)} must be included "
+            f"Fields {must_have_fields.difference(query_df.columns)} must be included "
             "in the query and are missing."
         )
 
-    def _create_cluster_entity(group: pd.DataFrame) -> ClusterEntity:
-        entity_refs = {
-            source: frozenset(group[key_field].dropna().values)
-            for source, key_field in keys.items()
-            if not group[key_field].dropna().empty
-        }
+    def _create_cluster_entity(group_df: pl.DataFrame) -> ClusterEntity:
+        # Get the cluster ID (should be the same for all rows in the group)
+        cluster_id = group_df["id"][0]
+
+        entity_refs = {}
+        for source, key_field in keys.items():
+            # Get non-null values for this key field
+            values = group_df.filter(pl.col(key_field).is_not_null())[key_field]
+            if len(values) > 0:
+                entity_refs[source] = frozenset(values.to_list())
 
         return ClusterEntity(
-            id=group.name,
+            id=cluster_id,
             keys=EntityReference(entity_refs),
         )
 
-    result = query.groupby("id").apply(_create_cluster_entity, include_groups=False)
-    return set(result.tolist())
+    # Group by cluster ID and create ClusterEntity for each group
+    result = []
+    for _cluster_id, group_df in query_df.group_by("id"):
+        result.append(_create_cluster_entity(group_df))
+
+    return set(result)
 
 
 @cache
diff --git a/src/matchbox/server/api/main.py b/src/matchbox/server/api/main.py
index e97981602..72abf3ff3 100644
--- a/src/matchbox/server/api/main.py
+++ b/src/matchbox/server/api/main.py
@@ -235,16 +235,16 @@ async def get_upload_status(
 )
 def query(
     backend: BackendDependency,
-    source: SourceResolutionName,
+    sources: Annotated[list[SourceResolutionName], Query()],
     return_leaf_id: bool,
     resolution: ResolutionName | None = None,
     threshold: int | None = None,
     limit: int | None = None,
 ) -> ParquetResponse:
-    """Query Matchbox for matches based on a source resolution name."""
+    """Query Matchbox for matches based on multiple source resolution names."""
     try:
         res = backend.query(
-            source=source,
+            sources=sources,
             resolution=resolution,
             threshold=threshold,
             return_leaf_id=return_leaf_id,
diff --git a/src/matchbox/server/base.py b/src/matchbox/server/base.py
index 94d0997bd..492cae5f7 100644
--- a/src/matchbox/server/base.py
+++ b/src/matchbox/server/base.py
@@ -228,7 +228,7 @@ class MatchboxDBAdapter(ABC):
     @abstractmethod
     def query(
         self,
-        source: SourceResolutionName,
+        sources: list[SourceResolutionName],
         resolution: ResolutionName | None = None,
         threshold: int | None = None,
         return_leaf_id: bool = False,
@@ -237,9 +237,9 @@ def query(
         """Queries the database from an optional point of truth.
 
         Args:
-            source: the `SourceResolutionName` string identifying the source to query
+            sources: list of `SourceResolutionName` strings identifying sources to query
             resolution (optional): the resolution to use for filtering results
-                If not specified, will use the source resolution for the queried source
+                If not specified, will use the source resolution for the first source
             threshold (optional): the threshold to use for creating clusters
                 If None, uses the models' default threshold
                 If an integer, uses that threshold for the specified model, and the
diff --git a/src/matchbox/server/postgresql/adapter.py b/src/matchbox/server/postgresql/adapter.py
index 2f07dc527..6ea8c133e 100644
--- a/src/matchbox/server/postgresql/adapter.py
+++ b/src/matchbox/server/postgresql/adapter.py
@@ -193,14 +193,14 @@ def __init__(self, settings: MatchboxPostgresSettings):
 
     def query(  # noqa: D102
         self,
-        source: SourceResolutionName,
+        sources: list[SourceResolutionName],
         resolution: ResolutionName | None = None,
         threshold: int | None = None,
         return_leaf_id: bool = False,
         limit: int | None = None,
     ) -> ArrowTable:
         return query(
-            source=source,
+            sources=sources,
             resolution=resolution,
             threshold=threshold,
             return_leaf_id=return_leaf_id,
diff --git a/src/matchbox/server/postgresql/utils/query.py b/src/matchbox/server/postgresql/utils/query.py
index 6469e355c..3f7d42ae6 100644
--- a/src/matchbox/server/postgresql/utils/query.py
+++ b/src/matchbox/server/postgresql/utils/query.py
@@ -2,6 +2,7 @@
 
 from typing import Literal, TypeVar
 
+import polars as pl
 import pyarrow as pa
 from sqlalchemy import (
     CTE,
@@ -467,17 +468,17 @@ def _build_match_query(
 
 
 def query(
-    source: SourceResolutionName,
+    sources: list[SourceResolutionName],
     resolution: ResolutionName | None = None,
     threshold: int | None = None,
     return_leaf_id: bool = False,
     limit: int = None,
 ) -> pa.Table:
-    """Queries Matchbox to retrieve linked data for a source.
+    """Queries Matchbox to retrieve linked data for multiple sources.
 
-    Retrieves all linked data for a given source, resolving through hierarchy if needed.
+    Retrieves all linked data for given sources, resolving through hierarchy if needed.
 
-    * Simple case: If querying the same resolution as the source, just select cluster
+    * Simple case: If querying the same resolution as the sources, just select cluster
         IDs and keys directly from ClusterSourceKey
     * Hierarchy case: Uses the unified query builder to traverse up the resolution
         hierarchy, applying COALESCE priority logic to determine which parent cluster
@@ -487,11 +488,19 @@ def query(
 
     Returns all records with their final resolved cluster IDs.
     """
+    if not sources:
+        raise ValueError("At least one source must be provided")
+
     with MBDB.get_session() as session:
-        source_config: SourceConfigs = get_source_config(source, session)
-        source_resolution: Resolutions = session.get(
-            Resolutions, source_config.resolution_id
-        )
+        # Get all source configs and validate they exist
+        source_configs: list[SourceConfigs] = []
+        source_resolutions: list[Resolutions] = []
+
+        for source in sources:
+            source_config = get_source_config(source, session)
+            source_configs.append(source_config)
+            source_resolution = session.get(Resolutions, source_config.resolution_id)
+            source_resolutions.append(source_resolution)
 
         if resolution:
             truth_resolution: Resolutions = (
@@ -502,14 +511,16 @@ def query(
             if truth_resolution is None:
                 raise MatchboxResolutionNotFoundError(name=resolution)
         else:
-            truth_resolution: Resolutions = source_resolution
+            # Use the first source's resolution as the truth resolution
+            truth_resolution: Resolutions = source_resolutions[0]
 
+        # Add source_config_id to the query level selection
         id_query: Select = build_unified_query(
             resolution=truth_resolution,
-            sources=[source_config],
+            sources=source_configs,
             threshold=threshold,
             level="key",
-        )
+        ).add_columns(ClusterSourceKey.source_config_id)
 
         if limit:
             id_query = id_query.limit(limit)
@@ -518,14 +529,36 @@ def query(
             stmt: str = compile_sql(id_query)
             logger.debug(f"Query SQL: \n {stmt}")
             id_results = sql_to_df(
-                stmt=stmt, connection=conn.dbapi_connection, return_type="arrow"
-            ).rename_columns({"root_id": "id"})
+                stmt=stmt, connection=conn.dbapi_connection, return_type="polars"
+            ).rename({"root_id": "id"})
+
+        # Add source identification column using Polars join (faster than map_elements)
+        # Create a mapping DataFrame for efficient joining
+        source_mapping_df = pl.DataFrame(
+            {
+                "source_config_id": [
+                    config.source_config_id for config in source_configs
+                ],
+                "source": [sources[i] for i in range(len(source_configs))],
+            }
+        )
 
-        selection = ["id", "key"]
+        # Join to add source names and cast to categorical for efficient encoding
+        id_results = id_results.join(
+            source_mapping_df, on="source_config_id", how="left"
+        ).with_columns(pl.col("source").cast(pl.Categorical))
+
+        # Select final columns
+        selection = ["id", "key", "source"]
         if return_leaf_id:
             selection.append("leaf_id")
 
-        return id_results.select(selection)
+        final_df = id_results.select(selection)
+
+        # Convert to Arrow - Polars creates dictionary<indices=int32, values=string>
+        # which is compatible with parquet format for efficient file sizes.
+        # Note: parquet would downgrade uint32/large_string anyway for compatibility.
+        return final_df.to_arrow()
 
 
 def get_parent_clusters_and_leaves(
diff --git a/test/client/helpers/test_query_helper.py b/test/client/helpers/test_query_helper.py
index f005e7ced..d4e4c0d7f 100644
--- a/test/client/helpers/test_query_helper.py
+++ b/test/client/helpers/test_query_helper.py
@@ -1,4 +1,7 @@
+from io import BytesIO
+
 import pyarrow as pa
+import pyarrow.parquet as pq
 import pytest
 from httpx import Response
 from numpy import ndarray
@@ -7,7 +10,7 @@
 
 from matchbox import query
 from matchbox.client.helpers import select
-from matchbox.common.arrow import SCHEMA_QUERY, table_to_buffer
+from matchbox.common.arrow import SCHEMA_QUERY
 from matchbox.common.dtos import BackendResourceType, NotFoundError
 from matchbox.common.exceptions import MatchboxResolutionNotFoundError
 from matchbox.common.factories.sources import source_factory, source_from_tuple
@@ -32,20 +35,29 @@ def test_query_no_resolution_ok_various_params(
         return_value=Response(200, json=testkit.source_config.model_dump(mode="json"))
     )
 
+    # Create mock table that matches parquet schema (int32/string dictionary)
+    id_array = pa.array([1, 2], type=pa.int64())
+    key_array = pa.array(["0", "1"], type=pa.large_string())
+
+    # Create dictionary with int32 indices and string values (parquet compatible)
+    source_indices = pa.array([0, 0], type=pa.int32())
+    source_dict = pa.array(["foo"], type=pa.string())
+    source_array = pa.DictionaryArray.from_arrays(source_indices, source_dict)
+
+    mock_table = pa.table([id_array, key_array, source_array], schema=SCHEMA_QUERY)
+
+    # Create parquet buffer instead of Arrow IPC
+    parquet_buffer = BytesIO()
+    pq.write_table(mock_table, parquet_buffer)
+    parquet_buffer.seek(0)
+
     query_route = matchbox_api.get("/query").mock(
         return_value=Response(
             200,
-            content=table_to_buffer(
-                pa.Table.from_pylist(
-                    [
-                        {"key": "0", "id": 1},
-                        {"key": "1", "id": 2},
-                    ],
-                    schema=SCHEMA_QUERY,
-                )
-            ).read(),
+            content=parquet_buffer.read(),
         )
-    )
+    )  # The query route mock above will handle both calls automatically
+    # since respx allows multiple calls to the same route
 
     selectors = select({"foo": ["a", "b"]}, client=sqlite_warehouse)
 
@@ -54,8 +66,10 @@ def test_query_no_resolution_ok_various_params(
     assert len(results) == 2
     assert {"foo_a", "foo_b", "id"} == set(results.columns)
 
-    assert dict(query_route.calls.last.request.url.params) == {
-        "source": testkit.source_config.name,
+    # Check first call (without threshold)
+    first_call_params = dict(query_route.calls[0].request.url.params)
+    assert first_call_params == {
+        "sources": testkit.source_config.name,
         "return_leaf_id": "False",
     }
 
@@ -66,8 +80,10 @@ def test_query_no_resolution_ok_various_params(
     assert len(results) == 2
     assert {"foo_a", "foo_b", "id"} == set(results.columns)
 
-    assert dict(query_route.calls.last.request.url.params) == {
-        "source": testkit.source_config.name,
+    # Check second call (with threshold)
+    second_call_params = dict(query_route.calls[1].request.url.params)
+    assert second_call_params == {
+        "sources": testkit.source_config.name,
         "threshold": "50",
         "return_leaf_id": "False",
     }
@@ -101,34 +117,31 @@ def test_query_multiple_sources(matchbox_api: MockRouter, sqlite_warehouse: Engi
         return_value=Response(200, json=testkit2.source_config.model_dump(mode="json"))
     )
 
+    # Create combined mock table for both sources using parquet-compatible schema
+    id_array = pa.array([1, 2, 1, 2], type=pa.int64())
+    key_array = pa.array(["0", "1", "2", "3"], type=pa.large_string())
+
+    # Create categorical source array with parquet-compatible types
+    source_indices = pa.array([0, 0, 1, 1], type=pa.int32())
+    source_dict = pa.array(
+        [testkit1.source_config.name, testkit2.source_config.name], type=pa.string()
+    )
+    source_array = pa.DictionaryArray.from_arrays(source_indices, source_dict)
+
+    combined_mock_table = pa.table(
+        [id_array, key_array, source_array], schema=SCHEMA_QUERY
+    )
+
+    # Mock for multi-source query - use general matching
+    combined_parquet_buffer = BytesIO()
+    pq.write_table(combined_mock_table, combined_parquet_buffer)
+    combined_parquet_buffer.seek(0)
+
     query_route = matchbox_api.get("/query").mock(
-        side_effect=[
-            Response(
-                200,
-                content=table_to_buffer(
-                    pa.Table.from_pylist(
-                        [
-                            {"key": "0", "id": 1},
-                            {"key": "1", "id": 2},
-                        ],
-                        schema=SCHEMA_QUERY,
-                    )
-                ).read(),
-            ),
-            Response(
-                200,
-                content=table_to_buffer(
-                    pa.Table.from_pylist(
-                        [
-                            {"key": "2", "id": 1},
-                            {"key": "3", "id": 2},
-                        ],
-                        schema=SCHEMA_QUERY,
-                    )
-                ).read(),
-            ),
-        ]
-        * 2  # 2 calls to `query()` in this test, each querying server twice
+        return_value=Response(
+            200,
+            content=combined_parquet_buffer.read(),
+        )
     )
 
     sels = select("foo", {"foo2": ["c"]}, client=sqlite_warehouse)
@@ -146,18 +159,21 @@ def test_query_multiple_sources(matchbox_api: MockRouter, sqlite_warehouse: Engi
         "id",
     } == set(results.columns)
 
-    assert dict(query_route.calls[-2].request.url.params) == {
-        "source": testkit1.source_config.name,
-        "resolution": DEFAULT_RESOLUTION,
-        "return_leaf_id": "False",
-    }
-    assert dict(query_route.calls[-1].request.url.params) == {
-        "source": testkit2.source_config.name,
-        "resolution": DEFAULT_RESOLUTION,
-        "return_leaf_id": "False",
+    # Check that the multi-source query was made correctly
+    from urllib.parse import parse_qs, urlparse
+
+    last_request_url = str(query_route.calls.last.request.url)
+    parsed_url = urlparse(last_request_url)
+    url_params = parse_qs(parsed_url.query)
+
+    assert url_params == {
+        "sources": [testkit1.source_config.name, testkit2.source_config.name],
+        "resolution": [DEFAULT_RESOLUTION],
+        "return_leaf_id": ["False"],
     }
 
     # It also works with the selectors specified separately
+    # But with the optimization, this also makes a single multi-source call
     query([sels[0]], [sels[1]], return_leaf_id=False)
 
 
@@ -195,41 +211,34 @@ def test_query_combine_type(
         return_value=Response(200, json=testkit2.source_config.model_dump(mode="json"))
     )
 
+    # Create combined mock table for multi-source query using parquet-compatible schema
+    id_array = pa.array([1, 1, 2, 2, 2, 3], type=pa.int64())
+    key_array = pa.array(["0", "1", "2", "3", "3", "4"], type=pa.large_string())
+
+    # Create categorical source array with parquet-compatible types
+    source_indices = pa.array([0, 0, 0, 1, 1, 1], type=pa.int32())
+    source_dict = pa.array(
+        [testkit1.source_config.name, testkit2.source_config.name], type=pa.string()
+    )
+    source_array = pa.DictionaryArray.from_arrays(source_indices, source_dict)
+
+    combined_mock_table = pa.table(
+        [id_array, key_array, source_array], schema=SCHEMA_QUERY
+    )
+
+    # Create parquet buffer for mock response
+    combined_parquet_buffer2 = BytesIO()
+    pq.write_table(combined_mock_table, combined_parquet_buffer2)
+    combined_parquet_buffer2.seek(0)
+
     matchbox_api.get("/query").mock(
-        side_effect=[
-            Response(
-                200,
-                content=table_to_buffer(
-                    pa.Table.from_pylist(
-                        [
-                            {"key": "0", "id": 1},
-                            {"key": "1", "id": 1},
-                            {"key": "2", "id": 2},
-                        ],
-                        schema=SCHEMA_QUERY,
-                    )
-                ).read(),
-            ),
-            Response(
-                200,
-                content=table_to_buffer(
-                    pa.Table.from_pylist(
-                        [
-                            # Creating a duplicate value for the same Matchbox ID
-                            {"key": "3", "id": 2},
-                            {"key": "3", "id": 2},
-                            {"key": "4", "id": 3},
-                        ],
-                        schema=SCHEMA_QUERY,
-                    )
-                ).read(),
-            ),
-        ]  # two sources to query
+        return_value=Response(
+            200,
+            content=combined_parquet_buffer2.read(),
+        )
     )
 
     sels = select("foo", "bar", client=sqlite_warehouse)
-
-    # Validate results
     results = query(sels, combine_type=combine_type, return_leaf_id=False)
 
     if combine_type == "set_agg":
diff --git a/test/client/test_eval.py b/test/client/test_eval.py
index 6d33b2f20..60e50a444 100644
--- a/test/client/test_eval.py
+++ b/test/client/test_eval.py
@@ -163,7 +163,13 @@ def test_get_samples(
             content=table_to_buffer(just_baz_samples).read(),
         )
     )
-    no_accessible_samples = get_samples(n=10, resolution="resolution", user_id=user_id)
+    # Suppress expected warning about incompatible client for 'baz' source
+    with pytest.warns(
+        UserWarning, match="Skipping baz, incompatible with given client"
+    ):
+        no_accessible_samples = get_samples(
+            n=10, resolution="resolution", user_id=user_id
+        )
     assert no_accessible_samples == {}
 
     # Using default client as fallback
diff --git a/test/client/test_extract.py b/test/client/test_extract.py
index d2caae7ae..836d8ab37 100644
--- a/test/client/test_extract.py
+++ b/test/client/test_extract.py
@@ -61,35 +61,48 @@ def test_key_field_map(
         )
     )
 
-    matchbox_api.get("/query", params={"source": "foo"}).mock(
+    # Create mock table for foo source (for single-source queries)
+    indices_foo = pa.array([0, 0, 0], type=pa.uint32())
+    dictionary_foo = pa.array(["foo"], type=pa.large_string())
+    source_dict_foo = pa.DictionaryArray.from_arrays(indices_foo, dictionary_foo)
+
+    foo_table = pa.Table.from_arrays(
+        [
+            pa.array([1, 2, 3], type=pa.int64()),
+            pa.array(["1", "2", "3"], type=pa.large_string()),
+            source_dict_foo,
+        ],
+        schema=SCHEMA_QUERY,
+    )
+
+    matchbox_api.get("/query", params={"sources": ["foo"]}).mock(
         return_value=Response(
             200,
-            content=table_to_buffer(
-                pa.Table.from_pylist(
-                    [
-                        {"id": 1, "key": "1"},
-                        {"id": 2, "key": "2"},
-                        {"id": 3, "key": "3"},
-                    ],
-                    schema=SCHEMA_QUERY,
-                )
-            ).read(),
+            content=table_to_buffer(foo_table).read(),
         )
     )
 
-    matchbox_api.get("/query", params={"source": "bar"}).mock(
+    # Create combined mock table for multi-source queries
+    combined_indices = pa.array([0, 0, 0, 1, 1, 1], type=pa.uint32())
+    combined_dictionary = pa.array(["foo", "bar"], type=pa.large_string())
+    combined_source_dict = pa.DictionaryArray.from_arrays(
+        combined_indices, combined_dictionary
+    )
+
+    combined_table = pa.Table.from_arrays(
+        [
+            pa.array([1, 2, 3, 1, 3, 3], type=pa.int64()),
+            pa.array(["1", "2", "3", "a", "b", "c"], type=pa.large_string()),
+            combined_source_dict,
+        ],
+        schema=SCHEMA_QUERY,
+    )
+
+    # Mock for multi-source query (no filter or both sources)
+    matchbox_api.get("/query", params={"sources": ["foo", "bar"]}).mock(
         return_value=Response(
             200,
-            content=table_to_buffer(
-                pa.Table.from_pylist(
-                    [
-                        {"id": 1, "key": "a"},
-                        {"id": 3, "key": "b"},
-                        {"id": 3, "key": "c"},
-                    ],
-                    schema=SCHEMA_QUERY,
-                )
-            ).read(),
+            content=table_to_buffer(combined_table).read(),
         )
     )
 
@@ -112,7 +125,7 @@ def test_key_field_map(
     )
 
     # With source filter
-    foo_mapping = key_field_map(resolution="companies", source_filter="foo")
+    foo_mapping = key_field_map(resolution="companies", source_filter=["foo"])
 
     assert_frame_equal(
         pl.from_arrow(foo_mapping),
@@ -124,7 +137,7 @@ def test_key_field_map(
     # With both filters
     foo_mapping = key_field_map(
         resolution="companies",
-        source_filter="foo",
+        source_filter=["foo"],
         location_names="sqlite",
     )
 
diff --git a/test/fixtures/db.py b/test/fixtures/db.py
index ff8a1029d..034b3cc5b 100644
--- a/test/fixtures/db.py
+++ b/test/fixtures/db.py
@@ -146,7 +146,7 @@ def create_dedupe_scenario(
         name = f"naive_test.{source.name}"
 
         # Query the raw data
-        source_query = backend.query(source=source.name)
+        source_query = backend.query(sources=[source.name])
 
         # Build model testkit using query data
         model_testkit = query_to_model_factory(
@@ -189,7 +189,7 @@ def create_probabilistic_dedupe_scenario(
         name = f"probabilistic_test.{source.name}"
 
         # Query the raw data
-        source_query = backend.query(source=source.name)
+        source_query = backend.query(sources=[source.name])
 
         # Build model testkit using query data
         model_testkit = query_to_model_factory(
@@ -234,9 +234,9 @@ def create_link_scenario(
     cdms_model = dag.models["naive_test.cdms"]
 
     # Query data for each resolution
-    crn_query = backend.query(source="crn", resolution=crn_model.name)
-    duns_query = backend.query(source="duns", resolution=duns_model.name)
-    cdms_query = backend.query(source="cdms", resolution=cdms_model.name)
+    crn_query = backend.query(sources=["crn"], resolution=crn_model.name)
+    duns_query = backend.query(sources=["duns"], resolution=duns_model.name)
+    cdms_query = backend.query(sources=["cdms"], resolution=cdms_model.name)
 
     # Create CRN-DUNS link
     crn_duns_name = "deterministic_naive_test.crn_naive_test.duns"
@@ -282,17 +282,17 @@ def create_link_scenario(
     # Create final join
     # Query the previous link's results
     crn_cdms_query_crn_only = backend.query(
-        source="crn", resolution=crn_cdms_name
-    ).rename_columns(["id", "keys_crn"])
+        sources=["crn"], resolution=crn_cdms_name
+    ).rename_columns(["id", "keys_crn", "source"])
     crn_cdms_query_cdms_only = backend.query(
-        source="cdms", resolution=crn_cdms_name
-    ).rename_columns(["id", "keys_cdms"])
+        sources=["cdms"], resolution=crn_cdms_name
+    ).rename_columns(["id", "keys_cdms", "source"])
     crn_cdms_query = pa.concat_tables(
         [crn_cdms_query_crn_only, crn_cdms_query_cdms_only],
         promote_options="default",
     ).combine_chunks()
 
-    duns_query_linked = backend.query(source="duns", resolution=duns_model.name)
+    duns_query_linked = backend.query(sources=["duns"], resolution=duns_model.name)
 
     final_join_name = "final_join"
     final_join_model = query_to_model_factory(
@@ -361,7 +361,7 @@ def create_alt_dedupe_scenario(
         model_name2 = f"dedupe2.{source.name}"
 
         # Query the raw data
-        source_query = backend.query(source=source.name)
+        source_query = backend.query(sources=[source.name])
 
         # Build model testkit using query data
         model_testkit1 = query_to_model_factory(
@@ -457,7 +457,7 @@ def create_convergent_scenario(
         name = f"naive_test.{source.name}"
 
         # Query the raw data
-        source_query = backend.query(source=source.name)
+        source_query = backend.query(sources=[source.name])
 
         # Build model testkit using query data
         model_testkit = query_to_model_factory(
diff --git a/test/server/api/routes/test_routes_main.py b/test/server/api/routes/test_routes_main.py
index 7d1fe78bf..946071e7e 100644
--- a/test/server/api/routes/test_routes_main.py
+++ b/test/server/api/routes/test_routes_main.py
@@ -309,23 +309,31 @@ def test_process_upload_deletes_file_on_failure(s3: S3Client):
 def test_query(test_client: TestClient):
     # Mock backend
     mock_backend = Mock()
-    mock_backend.query = Mock(
-        return_value=pa.Table.from_pylist(
-            [
-                {"keys": "a", "id": 1},
-                {"keys": "b", "id": 2},
-            ],
-            schema=SCHEMA_QUERY,
-        )
+
+    # Create test data with proper schema including categorical source
+    # Create dictionary array directly with large_string values
+    indices = pa.array([0, 0], type=pa.uint32())
+    dictionary = pa.array(["foo"], type=pa.large_string())
+    source_dict = pa.DictionaryArray.from_arrays(indices, dictionary)
+
+    mock_table = pa.Table.from_arrays(
+        [
+            pa.array([1, 2], type=pa.int64()),
+            pa.array(["a", "b"], type=pa.large_string()),
+            source_dict,
+        ],
+        schema=SCHEMA_QUERY,
     )
 
+    mock_backend.query = Mock(return_value=mock_table)
+
     # Override app dependencies with mocks
     app.dependency_overrides[backend] = lambda: mock_backend
 
-    # Hit endpoint
+    # Hit endpoint with sources parameter (now a list)
     response = test_client.get(
         "/query",
-        params={"source": "foo", "return_leaf_id": False},
+        params={"sources": ["foo"], "return_leaf_id": False},
     )
 
     # Process response
@@ -334,7 +342,14 @@ def test_query(test_client: TestClient):
 
     # Check response
     assert response.status_code == 200
-    assert table.schema.equals(SCHEMA_QUERY)
+    # Check that we have the right columns
+    assert len(table.schema) == 3
+    assert table.schema.names == ["id", "key", "source"]
+    assert table.schema.field("id").type == pa.int64()
+    assert table.schema.field("key").type == pa.large_string()
+    assert table.schema.field("source").type.equals(
+        pa.dictionary(pa.uint32(), pa.large_string())
+    )
 
 
 def test_query_404_resolution(test_client: TestClient):
@@ -347,7 +362,7 @@ def test_query_404_resolution(test_client: TestClient):
     # Hit endpoint
     response = test_client.get(
         "/query",
-        params={"source": "foo", "resolution": "bar", "return_leaf_id": True},
+        params={"sources": ["foo"], "resolution": "bar", "return_leaf_id": True},
     )
 
     # Check response
@@ -364,7 +379,7 @@ def test_query_404_source(test_client: TestClient):
     # Hit endpoint
     response = test_client.get(
         "/query",
-        params={"source": "foo", "return_leaf_id": True},
+        params={"sources": ["foo"], "return_leaf_id": True},
     )
 
     # Check response
diff --git a/test/server/postgresql/test_pg_sql.py b/test/server/postgresql/test_pg_sql.py
index 5f6b7b77a..ae28dae26 100644
--- a/test/server/postgresql/test_pg_sql.py
+++ b/test/server/postgresql/test_pg_sql.py
@@ -1180,7 +1180,9 @@ class TestQueryFunction:
 
     def test_query_source_only(self, populated_postgres_db: MatchboxPostgres):
         """Should query source data without resolution."""
-        result = query("source_a", resolution=None, threshold=None, limit=None)
+        result = query(
+            sources=["source_a"], resolution=None, threshold=None, limit=None
+        )
 
         # Should return all keys from source_a with their cluster assignments
         assert result.shape[0] == 6
@@ -1206,7 +1208,9 @@ def test_query_source_only(self, populated_postgres_db: MatchboxPostgres):
 
     def test_query_source_b_only(self, populated_postgres_db: MatchboxPostgres):
         """Should query source_b which has one key per cluster."""
-        result = query("source_b", resolution=None, threshold=None, limit=None)
+        result = query(
+            sources=["source_b"], resolution=None, threshold=None, limit=None
+        )
 
         # Should return all keys from source_b
         assert result.shape[0] == 5
@@ -1225,7 +1229,9 @@ def test_query_source_b_only(self, populated_postgres_db: MatchboxPostgres):
 
     def test_query_through_deduper(self, populated_postgres_db: MatchboxPostgres):
         """Should query source through its deduper resolution."""
-        result = query("source_a", resolution="dedupe_a", threshold=None, limit=None)
+        result = query(
+            sources=["source_a"], resolution="dedupe_a", threshold=None, limit=None
+        )
 
         # Should return all 6 keys, but some mapped to dedupe clusters
         assert result.shape[0] == 6
@@ -1246,7 +1252,9 @@ def test_query_through_deduper_with_threshold(
     ):
         """Should query source through deduper with threshold override."""
         # Test with threshold=90 (higher than dedupe_a's clusters)
-        result = query("source_a", resolution="dedupe_a", threshold=90, limit=None)
+        result = query(
+            sources=["source_a"], resolution="dedupe_a", threshold=90, limit=None
+        )
 
         # Should return all 6 keys, but no dedupe clusters qualify
         assert result.shape[0] == 6
@@ -1261,7 +1269,9 @@ def test_query_through_deduper_with_threshold(
 
     def test_query_through_linker(self, populated_postgres_db: MatchboxPostgres):
         """Should query source through complex linker resolution."""
-        result = query("source_a", resolution="linker_ab", threshold=None, limit=None)
+        result = query(
+            sources=["source_a"], resolution="linker_ab", threshold=None, limit=None
+        )
 
         # Should return all 6 keys with linker cluster assignments
         assert result.shape[0] == 6
@@ -1294,8 +1304,12 @@ def test_query_both_sources_through_linker(
         self, populated_postgres_db: MatchboxPostgres
     ):
         """Should query both sources through linker with consistent results."""
-        result_a = query("source_a", resolution="linker_ab", threshold=80, limit=None)
-        result_b = query("source_b", resolution="linker_ab", threshold=80, limit=None)
+        result_a = query(
+            sources=["source_a"], resolution="linker_ab", threshold=80, limit=None
+        )
+        result_b = query(
+            sources=["source_b"], resolution="linker_ab", threshold=80, limit=None
+        )
 
         # Both should return their respective key counts
         assert result_a.shape[0] == 6  # source_a keys
@@ -1338,7 +1352,7 @@ def test_query_both_sources_through_linker(
 
     def test_query_with_limit(self, populated_postgres_db: MatchboxPostgres):
         """Should respect limit parameter."""
-        result = query("source_a", resolution=None, threshold=None, limit=3)
+        result = query(sources=["source_a"], resolution=None, threshold=None, limit=3)
 
         # Should return only 3 rows
         assert result.shape[0] == 3
@@ -1350,7 +1364,9 @@ def test_query_multiple_keys_per_cluster_scenario(
     ):
         """Should handle case where multiple keys belong to same cluster."""
         # This tests the scenario causing your test failure
-        result = query("source_a", resolution="dedupe_a", threshold=80, limit=None)
+        result = query(
+            sources=["source_a"], resolution="dedupe_a", threshold=80, limit=None
+        )
 
         # source_a has 6 keys but some share clusters:
         # - keys 1,2 both in cluster 101 → both map to C301
diff --git a/test/server/test_adapter.py b/test/server/test_adapter.py
index 9c5de0530..cffadabd3 100644
--- a/test/server/test_adapter.py
+++ b/test/server/test_adapter.py
@@ -75,7 +75,7 @@ def test_validate_ids(self):
             crn_testkit = dag.sources.get("crn")
 
             df_crn = self.backend.query(
-                source=crn_testkit.source_config.name,
+                sources=[crn_testkit.source_config.name],
                 resolution="naive_test.crn",
             )
 
@@ -92,7 +92,7 @@ def test_validate_hashes(self):
             crn_testkit = dag.sources.get("crn")
 
             df_crn = self.backend.query(
-                source=crn_testkit.source_config.name,
+                sources=[crn_testkit.source_config.name],
                 resolution="naive_test.crn",
             )
 
@@ -112,7 +112,7 @@ def test_cluster_id_to_hash(self):
             crn_testkit = dag.sources.get("crn")
 
             df_crn = self.backend.query(
-                source=crn_testkit.source_config.name,
+                sources=[crn_testkit.source_config.name],
                 resolution="naive_test.crn",
             )
 
@@ -294,7 +294,7 @@ def test_model_results_basic(self):
             # Query returns the same results as the testkit, showing
             # that processing was performed accurately
             res = self.backend.query(
-                source=dag.sources["crn"].source_config.name,
+                sources=[dag.sources["crn"].source_config.name],
                 resolution="naive_test.crn",
             )
             res_clusters = query_to_cluster_entities(
@@ -350,7 +350,7 @@ def test_model_results_probabilistic(self):
             # Query returns the same results as the testkit, showing
             # that processing was performed accurately
             res = self.backend.query(
-                source=dag.sources["crn"].source_config.name,
+                sources=[dag.sources["crn"].source_config.name],
                 resolution="probabilistic_test.crn",
             )
             res_clusters = query_to_cluster_entities(
@@ -553,14 +553,14 @@ def test_query_only_source(self):
             crn_testkit = dag.sources.get("crn")
 
             df_crn_sample = self.backend.query(
-                source=crn_testkit.source_config.name,
+                sources=[crn_testkit.source_config.name],
                 limit=10,
             )
 
             assert isinstance(df_crn_sample, pa.Table)
             assert df_crn_sample.num_rows == 10
 
-            df_crn_full = self.backend.query(source=crn_testkit.source_config.name)
+            df_crn_full = self.backend.query(sources=[crn_testkit.source_config.name])
 
             assert df_crn_full.num_rows == crn_testkit.query.num_rows
             assert df_crn_full.schema.equals(SCHEMA_QUERY)
@@ -571,7 +571,7 @@ def test_query_return_leaf_ids(self):
             crn_testkit = dag.sources.get("crn")
 
             df_crn_full = self.backend.query(
-                source=crn_testkit.source_config.name, return_leaf_id=True
+                sources=[crn_testkit.source_config.name], return_leaf_id=True
             )
 
             assert df_crn_full.num_rows == crn_testkit.query.num_rows
@@ -583,7 +583,7 @@ def test_query_with_dedupe_model(self):
             crn_testkit = dag.sources.get("crn")
 
             df_crn = self.backend.query(
-                source=crn_testkit.source_config.name,
+                sources=[crn_testkit.source_config.name],
                 resolution="naive_test.crn",
             )
 
@@ -607,7 +607,7 @@ def test_query_with_link_model(self):
             duns_testkit = dag.sources.get("duns")
 
             df_crn = self.backend.query(
-                source=crn_testkit.source_config.name,
+                sources=[crn_testkit.source_config.name],
                 resolution=linker_name,
             )
 
@@ -616,7 +616,7 @@ def test_query_with_link_model(self):
             assert df_crn.schema.equals(SCHEMA_QUERY)
 
             df_duns = self.backend.query(
-                source=duns_testkit.source_config.name,
+                sources=[duns_testkit.source_config.name],
                 resolution=linker_name,
             )
 
@@ -644,7 +644,7 @@ def test_threshold_query_with_link_model(self):
             cdms_testkit = dag.sources.get("cdms")
 
             df_crn = self.backend.query(
-                source=crn_testkit.source_config.name,
+                sources=[crn_testkit.source_config.name],
                 resolution=linker_name,
             )
 
@@ -653,7 +653,7 @@ def test_threshold_query_with_link_model(self):
             assert df_crn.schema.equals(SCHEMA_QUERY)
 
             df_cdms = self.backend.query(
-                source=cdms_testkit.source_config.name,
+                sources=[cdms_testkit.source_config.name],
                 resolution=linker_name,
             )
 
@@ -667,12 +667,12 @@ def test_threshold_query_with_link_model(self):
 
             # Test query with threshold
             df_crn_threshold = self.backend.query(
-                source=crn_testkit.source_config.name,
+                sources=[crn_testkit.source_config.name],
                 resolution=linker_name,
                 threshold=100,
             )
             df_cdms_threshold = self.backend.query(
-                source=cdms_testkit.source_config.name,
+                sources=[cdms_testkit.source_config.name],
                 resolution=linker_name,
                 threshold=100,
             )
@@ -889,7 +889,7 @@ def get_counts():
 
             # Get some specific IDs to verify they're restored properly
             df_crn_before = self.backend.query(
-                source=crn_testkit.source_config.name,
+                sources=[crn_testkit.source_config.name],
                 resolution="naive_test.crn",
             )
             sample_ids_before = df_crn_before["id"].to_pylist()[:5]  # Take first 5 IDs
@@ -909,7 +909,7 @@ def get_counts():
 
             # Verify specific data was restored correctly
             df_crn_after = self.backend.query(
-                source=crn_testkit.source_config.name,
+                sources=[crn_testkit.source_config.name],
                 resolution="naive_test.crn",
             )
             sample_ids_after = df_crn_after["id"].to_pylist()[:5]  # Take first 5 IDs
@@ -939,10 +939,10 @@ def test_insert_and_get_judgement(self):
 
             # Do some queries to find real source cluster IDs
             deduped_query = pl.from_arrow(
-                self.backend.query(source="crn", resolution="naive_test.crn")
+                self.backend.query(sources=["crn"], resolution="naive_test.crn")
             )
             unique_ids = deduped_query["id"].unique()
-            all_leaves = pl.from_arrow(self.backend.query(source="crn"))
+            all_leaves = pl.from_arrow(self.backend.query(sources=["crn"]))
 
             def get_leaf_ids(cluster_id: int) -> list[int]:
                 return (
@@ -1098,9 +1098,9 @@ def test_sample_for_eval(self):
             # We now look at more interesting cases
             # Query backend to form expectations
             resolution_clusters = pl.from_arrow(
-                self.backend.query(source="crn", resolution="naive_test.crn")
+                self.backend.query(sources=["crn"], resolution="naive_test.crn")
             )
-            source_clusters = pl.from_arrow(self.backend.query(source="crn"))
+            source_clusters = pl.from_arrow(self.backend.query(sources=["crn"]))
             # We can request more than available
             assert len(resolution_clusters["id"].unique()) < 99
 
@@ -1111,8 +1111,11 @@ def test_sample_for_eval(self):
             assert samples_99.schema.equals(SCHEMA_EVAL_SAMPLES)
 
             # We can reconstruct the expected sample from resolution and source queries
+            # Select only needed columns from source_clusters to avoid column collision
             expected_sample = (
-                resolution_clusters.join(source_clusters, on="key", suffix="_source")
+                resolution_clusters.join(
+                    source_clusters.select(["id", "key"]), on="key", suffix="_source"
+                )
                 .rename({"id": "root", "id_source": "leaf"})
                 .with_columns(pl.lit("crn").alias("source"))
             )
diff --git a/uv.lock b/uv.lock
index f25396101..d1f21b453 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2756,27 +2756,28 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.12.7"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a1/81/0bd3594fa0f690466e41bd033bdcdf86cba8288345ac77ad4afbe5ec743a/ruff-0.12.7.tar.gz", hash = "sha256:1fc3193f238bc2d7968772c82831a4ff69252f673be371fb49663f0068b7ec71", size = 5197814 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e1/d2/6cb35e9c85e7a91e8d22ab32ae07ac39cc34a71f1009a6f9e4a2a019e602/ruff-0.12.7-py3-none-linux_armv6l.whl", hash = "sha256:76e4f31529899b8c434c3c1dede98c4483b89590e15fb49f2d46183801565303", size = 11852189 },
-    { url = "https://files.pythonhosted.org/packages/63/5b/a4136b9921aa84638f1a6be7fb086f8cad0fde538ba76bda3682f2599a2f/ruff-0.12.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:789b7a03e72507c54fb3ba6209e4bb36517b90f1a3569ea17084e3fd295500fb", size = 12519389 },
-    { url = "https://files.pythonhosted.org/packages/a8/c9/3e24a8472484269b6b1821794141f879c54645a111ded4b6f58f9ab0705f/ruff-0.12.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e1c2a3b8626339bb6369116e7030a4cf194ea48f49b64bb505732a7fce4f4e3", size = 11743384 },
-    { url = "https://files.pythonhosted.org/packages/26/7c/458dd25deeb3452c43eaee853c0b17a1e84169f8021a26d500ead77964fd/ruff-0.12.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32dec41817623d388e645612ec70d5757a6d9c035f3744a52c7b195a57e03860", size = 11943759 },
-    { url = "https://files.pythonhosted.org/packages/7f/8b/658798472ef260ca050e400ab96ef7e85c366c39cf3dfbef4d0a46a528b6/ruff-0.12.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47ef751f722053a5df5fa48d412dbb54d41ab9b17875c6840a58ec63ff0c247c", size = 11654028 },
-    { url = "https://files.pythonhosted.org/packages/a8/86/9c2336f13b2a3326d06d39178fd3448dcc7025f82514d1b15816fe42bfe8/ruff-0.12.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a828a5fc25a3efd3e1ff7b241fd392686c9386f20e5ac90aa9234a5faa12c423", size = 13225209 },
-    { url = "https://files.pythonhosted.org/packages/76/69/df73f65f53d6c463b19b6b312fd2391dc36425d926ec237a7ed028a90fc1/ruff-0.12.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5726f59b171111fa6a69d82aef48f00b56598b03a22f0f4170664ff4d8298efb", size = 14182353 },
-    { url = "https://files.pythonhosted.org/packages/58/1e/de6cda406d99fea84b66811c189b5ea139814b98125b052424b55d28a41c/ruff-0.12.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:74e6f5c04c4dd4aba223f4fe6e7104f79e0eebf7d307e4f9b18c18362124bccd", size = 13631555 },
-    { url = "https://files.pythonhosted.org/packages/6f/ae/625d46d5164a6cc9261945a5e89df24457dc8262539ace3ac36c40f0b51e/ruff-0.12.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d0bfe4e77fba61bf2ccadf8cf005d6133e3ce08793bbe870dd1c734f2699a3e", size = 12667556 },
-    { url = "https://files.pythonhosted.org/packages/55/bf/9cb1ea5e3066779e42ade8d0cd3d3b0582a5720a814ae1586f85014656b6/ruff-0.12.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06bfb01e1623bf7f59ea749a841da56f8f653d641bfd046edee32ede7ff6c606", size = 12939784 },
-    { url = "https://files.pythonhosted.org/packages/55/7f/7ead2663be5627c04be83754c4f3096603bf5e99ed856c7cd29618c691bd/ruff-0.12.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e41df94a957d50083fd09b916d6e89e497246698c3f3d5c681c8b3e7b9bb4ac8", size = 11771356 },
-    { url = "https://files.pythonhosted.org/packages/17/40/a95352ea16edf78cd3a938085dccc55df692a4d8ba1b3af7accbe2c806b0/ruff-0.12.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4000623300563c709458d0ce170c3d0d788c23a058912f28bbadc6f905d67afa", size = 11612124 },
-    { url = "https://files.pythonhosted.org/packages/4d/74/633b04871c669e23b8917877e812376827c06df866e1677f15abfadc95cb/ruff-0.12.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:69ffe0e5f9b2cf2b8e289a3f8945b402a1b19eff24ec389f45f23c42a3dd6fb5", size = 12479945 },
-    { url = "https://files.pythonhosted.org/packages/be/34/c3ef2d7799c9778b835a76189c6f53c179d3bdebc8c65288c29032e03613/ruff-0.12.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a07a5c8ffa2611a52732bdc67bf88e243abd84fe2d7f6daef3826b59abbfeda4", size = 12998677 },
-    { url = "https://files.pythonhosted.org/packages/77/ab/aca2e756ad7b09b3d662a41773f3edcbd262872a4fc81f920dc1ffa44541/ruff-0.12.7-py3-none-win32.whl", hash = "sha256:c928f1b2ec59fb77dfdf70e0419408898b63998789cc98197e15f560b9e77f77", size = 11756687 },
-    { url = "https://files.pythonhosted.org/packages/b4/71/26d45a5042bc71db22ddd8252ca9d01e9ca454f230e2996bb04f16d72799/ruff-0.12.7-py3-none-win_amd64.whl", hash = "sha256:9c18f3d707ee9edf89da76131956aba1270c6348bfee8f6c647de841eac7194f", size = 12912365 },
-    { url = "https://files.pythonhosted.org/packages/4c/9b/0b8aa09817b63e78d94b4977f18b1fcaead3165a5ee49251c5d5c245bb2d/ruff-0.12.7-py3-none-win_arm64.whl", hash = "sha256:dfce05101dbd11833a0776716d5d1578641b7fddb537fe7fa956ab85d1769b69", size = 11982083 },
+version = "0.12.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4a/45/2e403fa7007816b5fbb324cb4f8ed3c7402a927a0a0cb2b6279879a8bfdc/ruff-0.12.9.tar.gz", hash = "sha256:fbd94b2e3c623f659962934e52c2bea6fc6da11f667a427a368adaf3af2c866a", size = 5254702 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ad/20/53bf098537adb7b6a97d98fcdebf6e916fcd11b2e21d15f8c171507909cc/ruff-0.12.9-py3-none-linux_armv6l.whl", hash = "sha256:fcebc6c79fcae3f220d05585229463621f5dbf24d79fdc4936d9302e177cfa3e", size = 11759705 },
+    { url = "https://files.pythonhosted.org/packages/20/4d/c764ee423002aac1ec66b9d541285dd29d2c0640a8086c87de59ebbe80d5/ruff-0.12.9-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:aed9d15f8c5755c0e74467731a007fcad41f19bcce41cd75f768bbd687f8535f", size = 12527042 },
+    { url = "https://files.pythonhosted.org/packages/8b/45/cfcdf6d3eb5fc78a5b419e7e616d6ccba0013dc5b180522920af2897e1be/ruff-0.12.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5b15ea354c6ff0d7423814ba6d44be2807644d0c05e9ed60caca87e963e93f70", size = 11724457 },
+    { url = "https://files.pythonhosted.org/packages/72/e6/44615c754b55662200c48bebb02196dbb14111b6e266ab071b7e7297b4ec/ruff-0.12.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d596c2d0393c2502eaabfef723bd74ca35348a8dac4267d18a94910087807c53", size = 11949446 },
+    { url = "https://files.pythonhosted.org/packages/fd/d1/9b7d46625d617c7df520d40d5ac6cdcdf20cbccb88fad4b5ecd476a6bb8d/ruff-0.12.9-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1b15599931a1a7a03c388b9c5df1bfa62be7ede6eb7ef753b272381f39c3d0ff", size = 11566350 },
+    { url = "https://files.pythonhosted.org/packages/59/20/b73132f66f2856bc29d2d263c6ca457f8476b0bbbe064dac3ac3337a270f/ruff-0.12.9-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3d02faa2977fb6f3f32ddb7828e212b7dd499c59eb896ae6c03ea5c303575756", size = 13270430 },
+    { url = "https://files.pythonhosted.org/packages/a2/21/eaf3806f0a3d4c6be0a69d435646fba775b65f3f2097d54898b0fd4bb12e/ruff-0.12.9-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:17d5b6b0b3a25259b69ebcba87908496e6830e03acfb929ef9fd4c58675fa2ea", size = 14264717 },
+    { url = "https://files.pythonhosted.org/packages/d2/82/1d0c53bd37dcb582b2c521d352fbf4876b1e28bc0d8894344198f6c9950d/ruff-0.12.9-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:72db7521860e246adbb43f6ef464dd2a532ef2ef1f5dd0d470455b8d9f1773e0", size = 13684331 },
+    { url = "https://files.pythonhosted.org/packages/3b/2f/1c5cf6d8f656306d42a686f1e207f71d7cebdcbe7b2aa18e4e8a0cb74da3/ruff-0.12.9-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a03242c1522b4e0885af63320ad754d53983c9599157ee33e77d748363c561ce", size = 12739151 },
+    { url = "https://files.pythonhosted.org/packages/47/09/25033198bff89b24d734e6479e39b1968e4c992e82262d61cdccaf11afb9/ruff-0.12.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fc83e4e9751e6c13b5046d7162f205d0a7bac5840183c5beebf824b08a27340", size = 12954992 },
+    { url = "https://files.pythonhosted.org/packages/52/8e/d0dbf2f9dca66c2d7131feefc386523404014968cd6d22f057763935ab32/ruff-0.12.9-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:881465ed56ba4dd26a691954650de6ad389a2d1fdb130fe51ff18a25639fe4bb", size = 12899569 },
+    { url = "https://files.pythonhosted.org/packages/a0/bd/b614d7c08515b1428ed4d3f1d4e3d687deffb2479703b90237682586fa66/ruff-0.12.9-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:43f07a3ccfc62cdb4d3a3348bf0588358a66da756aa113e071b8ca8c3b9826af", size = 11751983 },
+    { url = "https://files.pythonhosted.org/packages/58/d6/383e9f818a2441b1a0ed898d7875f11273f10882f997388b2b51cb2ae8b5/ruff-0.12.9-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:07adb221c54b6bba24387911e5734357f042e5669fa5718920ee728aba3cbadc", size = 11538635 },
+    { url = "https://files.pythonhosted.org/packages/20/9c/56f869d314edaa9fc1f491706d1d8a47747b9d714130368fbd69ce9024e9/ruff-0.12.9-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f5cd34fabfdea3933ab85d72359f118035882a01bff15bd1d2b15261d85d5f66", size = 12534346 },
+    { url = "https://files.pythonhosted.org/packages/bd/4b/d8b95c6795a6c93b439bc913ee7a94fda42bb30a79285d47b80074003ee7/ruff-0.12.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:f6be1d2ca0686c54564da8e7ee9e25f93bdd6868263805f8c0b8fc6a449db6d7", size = 13017021 },
+    { url = "https://files.pythonhosted.org/packages/c7/c1/5f9a839a697ce1acd7af44836f7c2181cdae5accd17a5cb85fcbd694075e/ruff-0.12.9-py3-none-win32.whl", hash = "sha256:cc7a37bd2509974379d0115cc5608a1a4a6c4bff1b452ea69db83c8855d53f93", size = 11734785 },
+    { url = "https://files.pythonhosted.org/packages/fa/66/cdddc2d1d9a9f677520b7cfc490d234336f523d4b429c1298de359a3be08/ruff-0.12.9-py3-none-win_amd64.whl", hash = "sha256:6fb15b1977309741d7d098c8a3cb7a30bc112760a00fb6efb7abc85f00ba5908", size = 12840654 },
+    { url = "https://files.pythonhosted.org/packages/ac/fd/669816bc6b5b93b9586f3c1d87cd6bc05028470b3ecfebb5938252c47a35/ruff-0.12.9-py3-none-win_arm64.whl", hash = "sha256:63c8c819739d86b96d500cce885956a1a48ab056bbcbc61b747ad494b2485089", size = 11949623 },
 ]
 
 [[package]]