From 0c12db6114ce4be74719444d2677b601fb1c2d4f Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 10:04:13 -0700 Subject: [PATCH 01/40] add docker files --- docker/Dockerfile.api | 23 ++++++++++++ docker/Dockerfile.studio | 44 +++++++++++++++++++++++ docker/docker-compose.yml | 74 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+) create mode 100644 docker/Dockerfile.api create mode 100644 docker/Dockerfile.studio create mode 100644 docker/docker-compose.yml diff --git a/docker/Dockerfile.api b/docker/Dockerfile.api new file mode 100644 index 000000000..24e375610 --- /dev/null +++ b/docker/Dockerfile.api @@ -0,0 +1,23 @@ +FROM python:3.12 + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Install Poetry +RUN pip install --no-cache-dir poetry + +# Copy only requirements to cache them in docker layer +COPY pyproject.toml poetry.lock* ./ + +# Project initialization: +RUN poetry config virtualenvs.create false \ + && poetry install --no-interaction --no-ansi + +# Copy project +COPY src . + +CMD ["python", "-m", "ell.api"] diff --git a/docker/Dockerfile.studio b/docker/Dockerfile.studio new file mode 100644 index 000000000..760a6c84d --- /dev/null +++ b/docker/Dockerfile.studio @@ -0,0 +1,44 @@ +# Start with a Node.js base image for building the React app +FROM node:20 AS client-builder + +WORKDIR /app/ell-studio + +# Copy package.json and package-lock.json (if available) +COPY ell-studio/package.json ell-studio/package-lock.json* ./ + +# Install dependencies +RUN npm ci + +# Copy the rest of the client code +COPY ell-studio . + +# Build the React app +RUN npm run build + +# Now, start with the Python base image +FROM python:3.12 + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Install Poetry +RUN pip install --no-cache-dir poetry + +# Copy only requirements to cache them in docker layer +COPY pyproject.toml poetry.lock* ./ + +# Project initialization: +RUN poetry config virtualenvs.create false \ + && poetry install --no-interaction --no-ansi + +# Copy the Python project +COPY src . + +# Copy the built React app from the client-builder stage +COPY --from=client-builder /app/ell-studio/build /app/ell/studio/static + +CMD ["python", "-m", "ell.studio"] \ No newline at end of file diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 000000000..bcd2ee930 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,74 @@ +services: + api: + build: + context: . + dockerfile: Dockerfile.api + tags: + - ell-api + + ports: + - "8081:8081" + environment: + - HOST=0.0.0.0 + - PORT=8081 + - ELL_PG_CONNECTION_STRING=postgresql://ell_user:ell_password@postgres:5432/ell_db + - ELL_MQTT_CONNECTION_STRING=mqtt://mqtt:1883 + - LOG_LEVEL=DEBUG + depends_on: + - postgres + - mqtt + + studio: + build: + context: . + dockerfile: Dockerfile.studio + tags: + - ell-studio + ports: + - "8080:8080" + environment: + - HOST=0.0.0.0 + - PORT=8080 # currently doesn't take effect -- cli defaults it + - ELL_PG_CONNECTION_STRING=postgresql://ell_user:ell_password@postgres:5432/ell_db + - ELL_MQTT_CONNECTION_STRING=mqtt://mqtt:1883 + depends_on: + - postgres + - mqtt + develop: + watch: + - action: sync+restart + path: ./src/ell/studio + target: /app/ell/studio + + mqtt: + image: eclipse-mosquitto:latest + ports: + - "1883:1883" + command: mosquitto -c /mosquitto/config/mosquitto.conf + volumes: + - mosquitto_config:/mosquitto/config + depends_on: + - mqtt-config + + mqtt-config: + image: busybox + volumes: + - mosquitto_config:/mosquitto/config + command: > + sh -c "echo 'listener 1883' > /mosquitto/config/mosquitto.conf && + echo 'allow_anonymous true' >> /mosquitto/config/mosquitto.conf" + + postgres: + image: postgres:16 + environment: + - POSTGRES_USER=ell_user + - POSTGRES_PASSWORD=ell_password + - POSTGRES_DB=ell_db + volumes: + - postgres_data:/var/lib/postgresql/data + ports: + - "5432:5432" + +volumes: + postgres_data: + mosquitto_config: \ No newline at end of file From 2a36678d74eff8e8851e267e3156f96092c0d95e Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 10:06:50 -0700 Subject: [PATCH 02/40] add serialization types --- src/ell/types/serialize.py | 116 +++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 src/ell/types/serialize.py diff --git a/src/ell/types/serialize.py b/src/ell/types/serialize.py new file mode 100644 index 000000000..40ad0a02d --- /dev/null +++ b/src/ell/types/serialize.py @@ -0,0 +1,116 @@ +import uuid +from datetime import datetime, timezone +from functools import cached_property +from typing import Any, Dict, List, Optional, Union + +from openai import BaseModel +from pydantic import AwareDatetime, Field + +from ell.types.lmp import LMPType +from ell.types.message import Message + + +def utc_now() -> datetime: + """ + Returns the current UTC timestamp. + Serializes to ISO-8601. + """ + return datetime.now(tz=timezone.utc) + + +class WriteLMPInput(BaseModel): + """ + Arguments to write a LMP. + """ + lmp_id: str + name: str + source: str + dependencies: str + lmp_type: LMPType + api_params: Optional[Dict[str, Any]] = None + initial_free_vars: Optional[Dict[str, Any]] = None + initial_global_vars: Optional[Dict[str, Any]] = None + + # this is omitted so as to not confuse whether the number should be incremented (should always happen at the db level) + # num_invocations: Optional[int] = None + commit_message: Optional[str] = None + version_number: Optional[int] = None + created_at: Optional[AwareDatetime] = Field(default_factory=utc_now) + + +class LMP(BaseModel): + lmp_id: str + name: str + source: str + dependencies: str + lmp_type: LMPType + api_params: Optional[Dict[str, Any]] + initial_free_vars: Optional[Dict[str, Any]] + initial_global_vars: Optional[Dict[str, Any]] + created_at: AwareDatetime + version_number: int + commit_message: Optional[str] + num_invocations: int + + +GetLMPResponse = Optional[LMP] + +InvocationResults = Union[List[Message], Any] + + +class InvocationContents(BaseModel): + invocation_id: str = Field(default_factory=lambda: str(uuid.uuid4())) + params: Optional[Dict[str, Any]] = None + results: Optional[InvocationResults] = None + invocation_api_params: Optional[Dict[str, Any]] = None + global_vars: Optional[Dict[str, Any]] = None + free_vars: Optional[Dict[str, Any]] = None + is_external: bool = Field(default=False) + + @cached_property + def total_size_bytes(self) -> int: + """ + Returns the total uncompressed size of the invocation contents as JSON in bytes. + """ + import json + json_fields = [ + self.params, + self.results, + self.invocation_api_params, + self.global_vars, + self.free_vars + ] + return sum(len(json.dumps(field, default=(lambda x: x.model_dump_json() if isinstance(x, BaseModel) else str(x))).encode('utf-8')) for field in json_fields if field is not None) + + @cached_property + def should_externalize(self) -> bool: + return self.total_size_bytes > 102400 # Precisely 100kb in bytes + + +class Invocation(BaseModel): + """ + An invocation of an LMP. + """ + id: Optional[str] = None + lmp_id: str + latency_ms: int + prompt_tokens: Optional[int] = None + completion_tokens: Optional[int] = None + state_cache_key: Optional[str] = None + created_at: AwareDatetime = Field(default_factory=utc_now) + used_by_id: Optional[str] = None + contents: InvocationContents + + +class WriteInvocationInput(BaseModel): + """ + Arguments to write an invocation. + """ + invocation: Invocation + consumes: List[str] + + +class LMPInvokedEvent(BaseModel): + lmp_id: str + # invocation_id: str + consumes: List[str] From 1880d663d59c01f9e3bba8173eb745469802ae79 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 10:18:22 -0700 Subject: [PATCH 03/40] add get_lmp method to store interface --- src/ell/stores/store.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/ell/stores/store.py b/src/ell/stores/store.py index 408513cef..8390b9804 100644 --- a/src/ell/stores/store.py +++ b/src/ell/stores/store.py @@ -1,11 +1,18 @@ from abc import ABC, abstractmethod from contextlib import contextmanager from datetime import datetime -from typing import Any, Optional, Dict, List, Set, Union +from typing import Any, Optional, Dict, List, Set, Union, TYPE_CHECKING + from ell.types._lstr import _lstr from ell.stores.studio import SerializedLMP, Invocation from ell.types.message import InvocableLM +if TYPE_CHECKING: + from sqlmodel import Session +else: + Session = None + + class BlobStore(ABC): @abstractmethod def store_blob(self, blob: bytes, blob_id : str) -> str: @@ -66,6 +73,12 @@ def get_versions_by_fqn(self, fqn :str) -> List[SerializedLMP]: """ pass + @abstractmethod + def get_lmp(self, lmp_id: str, session: Optional[Session] = None) -> Optional[SerializedLMP]: + """ + Get an LMP by its id. + """ + pass @contextmanager def freeze(self, *lmps: InvocableLM): From 8d5e72f0554b9d8f3aaa74e0dcccea3fbe7e167d Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 10:19:09 -0700 Subject: [PATCH 04/40] implement get_lmp, support in-memory sqlite for testing --- src/ell/stores/sql.py | 46 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/src/ell/stores/sql.py b/src/ell/stores/sql.py index 3000ef02b..f9e8478cf 100644 --- a/src/ell/stores/sql.py +++ b/src/ell/stores/sql.py @@ -5,17 +5,25 @@ import ell.stores.store from sqlalchemy.sql import text from ell.stores.studio import InvocationTrace, SerializedLMP, Invocation -from sqlalchemy import func, and_ +from sqlalchemy import func, and_, Engine from ell.util.serialization import pydantic_ltype_aware_cattr import gzip import json class SQLStore(ell.stores.store.Store): - def __init__(self, db_uri: str, blob_store: Optional[ell.stores.store.BlobStore] = None): - self.engine = create_engine(db_uri, - json_serializer=lambda obj: json.dumps(pydantic_ltype_aware_cattr.unstructure(obj), - sort_keys=True, default=repr, ensure_ascii=False)) - + def __init__(self, db_uri: str = None, blob_store: Optional[ell.stores.store.BlobStore] = None, + engine: Optional[Engine] = None, ): + if engine is not None: + self.engine = engine + elif db_uri is None: + raise ValueError( + "db_uri cannot be None when engine is not provided as an argument") + else: + self.engine = create_engine(db_uri, + json_serializer=lambda obj: json.dumps( + pydantic_ltype_aware_cattr.unstructure(obj), + sort_keys=True, default=repr, ensure_ascii=False)) + SQLModel.metadata.create_all(self.engine) self.open_files: Dict[str, Dict[str, Any]] = {} super().__init__(blob_store) @@ -92,7 +100,13 @@ def get_latest_lmps(self, session: Session, skip: int = 0, limit: int = 10) -> L return self.get_lmps(session, skip=skip, limit=limit, subquery=subquery, **filters) - + def get_lmp(self, lmp_id: str, session: Optional[Session] = None) -> Optional[SerializedLMP]: + if session is None: + with Session(self.engine) as session: + return session.exec(select(SerializedLMP).where(SerializedLMP.lmp_id == lmp_id)).first() + else: + return session.exec(select(SerializedLMP).where(SerializedLMP.lmp_id == lmp_id)).first() + def get_lmps(self, session: Session, skip: int = 0, limit: int = 10, subquery=None, **filters: Optional[Dict[str, Any]]) -> List[Dict[str, Any]]: query = select(SerializedLMP) @@ -204,7 +218,23 @@ def get_invocations_aggregate(self, session: Session, lmp_filters: Dict[str, Any class SQLiteStore(SQLStore): def __init__(self, db_dir: str): assert not db_dir.endswith('.db'), "Create store with a directory not a db." - + if ":memory:" in db_dir: + from sqlalchemy.pool import StaticPool + # todo. set up blob store for in-memory + engine = create_engine( + 'sqlite://', + connect_args={'check_same_thread': False}, + poolclass=StaticPool, + json_serializer=lambda obj: + json.dumps(pydantic_ltype_aware_cattr.unstructure(obj), + sort_keys=True, + default=repr, + ensure_ascii=False + ) + ) + super().__init__(engine=engine) + return + os.makedirs(db_dir, exist_ok=True) self.db_dir = db_dir db_path = os.path.join(db_dir, 'ell.db') From d70adbd89eabaff35b4bb40d95dbd69e8e835a25 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 10:42:35 -0700 Subject: [PATCH 05/40] create sqlmodels from serialized --- src/ell/stores/studio.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/ell/stores/studio.py b/src/ell/stores/studio.py index f02f68ed7..3c3e7c387 100644 --- a/src/ell/stores/studio.py +++ b/src/ell/stores/studio.py @@ -1,3 +1,4 @@ +# todo. rename this file sqlmodels because it is not unique to studio from datetime import datetime, timezone import enum from functools import cached_property @@ -19,6 +20,9 @@ from sqlalchemy import Index, func from typing import TypeVar, Any +import ell.types.serialize + + def utc_now() -> datetime: """ @@ -86,6 +90,23 @@ class SerializedLMP(SerializedLMPBase, table=True): ), ) + @staticmethod + def from_api(input: ell.types.serialize.WriteLMPInput): + return SerializedLMP( + lmp_id=input.lmp_id, + lmp_type=input.lmp_type, + name=input.name, + source=input.source, + dependencies=input.dependencies, + api_params=input.api_params, + version_number=input.version_number, + initial_global_vars=input.initial_global_vars, + initial_free_vars=input.initial_free_vars, + commit_message=input.commit_message, + created_at=cast(datetime, input.created_at) + ) + + class Config: table_name = "serializedlmp" unique_together = [("version_number", "name")] @@ -142,6 +163,10 @@ def should_externalize(self) -> bool: class InvocationContents(InvocationContentsBase, table=True): invocation: "Invocation" = Relationship(back_populates="contents") + @classmethod + def from_api(cls, input: ell.types.serialize.InvocationContents): + return cls(**input.model_dump()) + class Invocation(InvocationBase, table=True): lmp: SerializedLMP = Relationship(back_populates="invocations") consumed_by: List["Invocation"] = Relationship( @@ -169,3 +194,10 @@ class Invocation(InvocationBase, table=True): Index('ix_invocation_created_at_latency_ms', 'created_at', 'latency_ms'), Index('ix_invocation_created_at_tokens', 'created_at', 'prompt_tokens', 'completion_tokens'), ) + + @classmethod + def from_api(cls, input: ell.types.serialize.Invocation): + return cls( + **input.model_dump(exclude={"contents"}), + contents=InvocationContents.from_api(input.contents) + ) From 1d840fde531eb14aa38a0df79deb1585f869a46e Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 10:46:18 -0700 Subject: [PATCH 06/40] add serialization clients --- src/ell/api/client.py | 193 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 src/ell/api/client.py diff --git a/src/ell/api/client.py b/src/ell/api/client.py new file mode 100644 index 000000000..27c1706b6 --- /dev/null +++ b/src/ell/api/client.py @@ -0,0 +1,193 @@ +#todo. separate http serializer from sql and all of this from the server +import httpx +from typing import Any, Dict, Optional, Protocol, List +# todo. check this does not cause circularity +from ell.types.serialize import LMP, GetLMPResponse, WriteLMPInput, WriteInvocationInput +from ell.stores.sql import SQLiteStore, PostgresStore +# todo. we should convert the other way and keep store and its dependencies separate from this module +from ell.stores.studio import SerializedLMP, Invocation +import logging +from httpx import HTTPStatusError + + +class EllClient(Protocol): + async def get_lmp(self, lmp_id: str) -> GetLMPResponse: + ... + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + ... + + async def write_invocation(self, input: WriteInvocationInput) -> None: + ... + + async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + ... + + async def retrieve_blob(self, blob_id: str) -> bytes: + ... + + async def close(self): + ... + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + ... + + +# todo. ell http client / api server client? +class EllAPIClient(EllClient): + def __init__(self, base_url: str): + self.base_url = base_url + self.client = httpx.AsyncClient(base_url=base_url) + + async def get_lmp(self, lmp_id: str) -> GetLMPResponse: + response = await self.client.get(f"/lmp/{lmp_id}") + response.raise_for_status() + data = response.json() + if data is None: + return None + return LMP(**data) + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + try: + response = await self.client.post("/lmp", json={ + "lmp": lmp.model_dump(mode="json"), + "uses": uses + }) + response.raise_for_status() + except HTTPStatusError as e: + if e.response.status_code == 422: + error_detail = e.response.json().get("detail", "No detailed error message provided") + logging.error(f"Unprocessable Entity (422) Error: {error_detail}") + raise ValueError(f"Invalid input: {error_detail}") from e + raise + + async def write_invocation(self, input: WriteInvocationInput) -> None: + response = await self.client.post( + "/invocation", + json=input.model_dump(mode="json") + ) + response.raise_for_status() + return None + + async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + response = await self.client.post("/blob", data={ + "blob": blob, + "metadata": metadata + }) + response.raise_for_status() + return response.json()["blob_id"] + + async def retrieve_blob(self, blob_id: str) -> bytes: + response = await self.client.get(f"/blob/{blob_id}") + response.raise_for_status() + return response.content + + async def close(self): + await self.client.aclose() + + async def __aenter__(self): + return self + + async def __aexit__(self): + await self.close() + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + response = await self.client.get("/lmp/versions", params={"fqn": fqn}) + response.raise_for_status() + data = response.json() + return [LMP(**lmp_data) for lmp_data in data] + + +class EllSqliteClient(EllClient): + def __init__(self, storage_dir: str): + self.store = SQLiteStore(storage_dir) + + async def get_lmp(self, lmp_id: str): + lmp = self.store.get_lmp(lmp_id) + if lmp: + return LMP(**lmp.model_dump()) + return None + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + slmps = self.store.get_versions_by_fqn(fqn) + return [LMP(**slmp.model_dump()) for slmp in slmps] + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + serialized_lmp = SerializedLMP(**lmp.model_dump()) + self.store.write_lmp(serialized_lmp, uses) + + async def write_invocation(self, input: WriteInvocationInput) -> None: + invocation = Invocation.from_api(input.invocation) + self.store.write_invocation( + invocation, + set(input.consumes) + ) + return None + + async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.store_blob(blob, metadata) + + async def retrieve_blob(self, blob_id: str) -> bytes: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.retrieve_blob(blob_id) + + async def close(self): + # SQLiteStore doesn't have a close method, so this is a no-op + pass + + async def __aenter__(self): + return self + + async def __aexit__(self): + await self.close() + + +# Nb: these are async clients. maybe we want separate sync ones? +class EllPostgresClient(EllClient): + def __init__(self, db_uri: str): + self.store = PostgresStore(db_uri) + + async def get_lmp(self, lmp_id: str): + lmp = self.store.get_lmp(lmp_id) + if lmp: + return LMP(**lmp.model_dump()) + return None + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + slmps = self.store.get_versions_by_fqn(fqn) + return [LMP(**slmp.model_dump()) for slmp in slmps] + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + model = SerializedLMP.from_api(lmp) + self.store.write_lmp(model, uses) + + async def write_invocation(self, input: WriteInvocationInput) -> None: + invocation, consumes = input.to_serialized_invocation_input() + self.store.write_invocation( + invocation, + set(consumes) + ) + return None + + async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.store_blob(blob, metadata) + + async def retrieve_blob(self, blob_id: str) -> bytes: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.retrieve_blob(blob_id) + + async def close(self): + # todo. Do we have a close method? + pass + + async def __aenter__(self): + return self + + async def __aexit__(self): + await self.close() From bc7767157d3237b4ca1fa4d9e9140a72f0cc579e Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 10:46:41 -0700 Subject: [PATCH 07/40] add api server, tests --- src/ell/api/__init__.py | 0 src/ell/api/config.py | 49 +++++++++ src/ell/api/logger.py | 42 ++++++++ src/ell/api/mqtt_publisher.py | 13 +++ src/ell/api/publisher.py | 13 +++ src/ell/api/server.py | 170 +++++++++++++++++++++++++++++ tests/api/test_api.py | 198 ++++++++++++++++++++++++++++++++++ 7 files changed, 485 insertions(+) create mode 100644 src/ell/api/__init__.py create mode 100644 src/ell/api/config.py create mode 100644 src/ell/api/logger.py create mode 100644 src/ell/api/mqtt_publisher.py create mode 100644 src/ell/api/publisher.py create mode 100644 src/ell/api/server.py create mode 100644 tests/api/test_api.py diff --git a/src/ell/api/__init__.py b/src/ell/api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/ell/api/config.py b/src/ell/api/config.py new file mode 100644 index 000000000..db121b585 --- /dev/null +++ b/src/ell/api/config.py @@ -0,0 +1,49 @@ +# todo. move this under ell.api.server +from functools import lru_cache +import json +import os +from typing import Any, Optional +from pydantic import BaseModel + +import logging + +logger = logging.getLogger(__name__) + + +# todo. maybe we default storage dir and other things in the future to a well-known location +# like ~/.ell or something +@lru_cache(maxsize=1) +def ell_home() -> str: + return os.path.join(os.path.expanduser("~"), ".ell") + + +class Config(BaseModel): + storage_dir: Optional[str] = None + pg_connection_string: Optional[str] = None + mqtt_connection_string: Optional[str] = None + log_level: int = logging.INFO + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + + def model_post_init(self, __context: Any): + # Storage + self.pg_connection_string = self.pg_connection_string or os.getenv( + "ELL_PG_CONNECTION_STRING") + self.storage_dir = self.storage_dir or os.getenv("ELL_STORAGE_DIR") + + # Enforce that we use either sqlite or postgres, but not both + if self.pg_connection_string is not None and self.storage_dir is not None: + raise ValueError("Cannot use both sqlite and postgres") + + # For now, fall back to sqlite if no PostgreSQL connection string is provided + if self.pg_connection_string is None and self.storage_dir is None: + # This intends to honor the default we had set in the CLI + # todo. better default? + self.storage_dir = os.getcwd() + + # Pubsub + self.mqtt_connection_string = self.mqtt_connection_string or os.getenv("ELL_MQTT_CONNECTION_STRING") + + logger.info(f"Resolved config: {json.dumps(self.model_dump(), indent=2)}") + diff --git a/src/ell/api/logger.py b/src/ell/api/logger.py new file mode 100644 index 000000000..c316b3b2e --- /dev/null +++ b/src/ell/api/logger.py @@ -0,0 +1,42 @@ +#todo. move under ell.api.server + +import logging +from colorama import Fore, Style, init + +initialized = False +def setup_logging(level: int = logging.INFO): + global initialized + if initialized: + return + # Initialize colorama for cross-platform colored output + init(autoreset=True) + + # Create a custom formatter + class ColoredFormatter(logging.Formatter): + FORMATS = { + logging.DEBUG: Fore.CYAN + "[%(asctime)s] %(levelname)-8s %(name)s: %(message)s" + Style.RESET_ALL, + logging.INFO: Fore.GREEN + "[%(asctime)s] %(levelname)-8s %(name)s: %(message)s" + Style.RESET_ALL, + logging.WARNING: Fore.YELLOW + "[%(asctime)s] %(levelname)-8s %(name)s: %(message)s" + Style.RESET_ALL, + logging.ERROR: Fore.RED + "[%(asctime)s] %(levelname)-8s %(name)s: %(message)s" + Style.RESET_ALL, + logging.CRITICAL: Fore.RED + Style.BRIGHT + "[%(asctime)s] %(levelname)-8s %(name)s: %(message)s" + Style.RESET_ALL + } + + def format(self, record): + log_fmt = self.FORMATS.get(record.levelno) + formatter = logging.Formatter(log_fmt, datefmt="%Y-%m-%d %H:%M:%S") + return formatter.format(record) + + # Create and configure the logger + logger = logging.getLogger("ell") + logger.setLevel(level) + + # Create console handler and set formatter + console_handler = logging.StreamHandler() + console_handler.setFormatter(ColoredFormatter()) + + # Add the handler to the logger + logger.addHandler(console_handler) + + initialized = True + + return logger \ No newline at end of file diff --git a/src/ell/api/mqtt_publisher.py b/src/ell/api/mqtt_publisher.py new file mode 100644 index 000000000..09681923f --- /dev/null +++ b/src/ell/api/mqtt_publisher.py @@ -0,0 +1,13 @@ +# nb this is to keep aiomqtt optional and out of everything else +import aiomqtt + +from ell.api.publisher import Publisher + + +class MqttPub(Publisher): + def __init__(self, conn: aiomqtt.Client): + self.mqtt_client = conn + + async def publish(self, topic: str, message: str) -> None: + await self.mqtt_client.publish(topic, message) + diff --git a/src/ell/api/publisher.py b/src/ell/api/publisher.py new file mode 100644 index 000000000..09795c542 --- /dev/null +++ b/src/ell/api/publisher.py @@ -0,0 +1,13 @@ +#todo. under api-server / api.server ... maybe? +from abc import ABC, abstractmethod + + +class Publisher(ABC): + @abstractmethod + async def publish(self, topic: str, message: str) -> None: + pass + + +class NoopPublisher(Publisher): + async def publish(self, topic: str, message: str) -> None: + pass \ No newline at end of file diff --git a/src/ell/api/server.py b/src/ell/api/server.py new file mode 100644 index 000000000..d6fa67f75 --- /dev/null +++ b/src/ell/api/server.py @@ -0,0 +1,170 @@ +# todo. under ell.api.server.___main___ +import asyncio +from contextlib import asynccontextmanager +import json +import logging +from typing import List, Optional + +# fixme. get this out of here +import aiomqtt +from fastapi import Depends, FastAPI, HTTPException + +from ell.api.client import EllClient, EllPostgresClient, EllSqliteClient +from ell.api.config import Config +from ell.api.publisher import NoopPublisher, Publisher +from ell.types.serialize import GetLMPResponse, LMPInvokedEvent, WriteInvocationInput, WriteLMPInput, LMP + +logger = logging.getLogger(__name__) + +publisher: Optional[Publisher] = None + + +async def get_publisher(): + yield publisher + + +serializer: Optional[EllClient] = None + + +def init_serializer(config: Config) -> EllClient: + global serializer + if serializer is not None: + return serializer + elif config.pg_connection_string: + try: + from ell.api.client import EllPostgresClient + return EllPostgresClient(config.pg_connection_string) + except ImportError: + # todo. centralize this in util or something, we have it everywhere + raise ImportError( + "Postgres storage is not enabled. Enable it with `pip install -U ell-api[postgres]`. More info: https://docs.ell.so/installation") + elif config.storage_dir: + try: + from ell.api.client import EllSqliteClient + return EllSqliteClient(config.pg_connection_string) + except ImportError: + raise ImportError( + "SQLite storage is not enabled. Enable it with `pip install -U ell-api[sqlite]`. More info: https://docs.ell.so/installation" + ) + + else: + raise ValueError("No storage configuration found") + + +def get_serializer(): + if serializer is None: + raise ValueError("Serializer not initialized") + return serializer + + +# def get_session(): +# if serializer is None: +# raise ValueError("Serializer not initialized") +# with Session(serializer.engine) as session: +# yield session + + +def create_app(config: Config): + # setup_logging(config.log_level) + + @asynccontextmanager + async def lifespan(app: FastAPI): + global serializer + global publisher + + logger.info("Starting lifespan") + + serializer = init_serializer(config) + + if config.mqtt_connection_string is not None: + try: + from ell.api.mqtt_publisher import MqttPub + except ImportError: + raise ImportError("Missing MQTT dependencies. Install them with `pip install -U ell-ai[mqtt]") + + # fixme. have the class do all of this if possible + host, port = config.mqtt_connection_string.split("://")[1].split(":") + + logger.info(f"Connecting to MQTT broker at {host}:{port}") + try: + async with aiomqtt.Client(host, int(port) if port else 1883) as mqtt: + logger.info("Connected to MQTT") + publisher = MqttPub(mqtt) + yield # Allow the app to run + except aiomqtt.MqttError as e: + logger.error(f"Failed to connect to MQTT", exc_info=e) + publisher = None + else: + publisher = NoopPublisher() + yield # allow the app to run + + app = FastAPI( + title="ELL API", + description="API server for ELL", + version="0.1.0", + lifespan=lifespan + ) + + @app.get("/lmp/versions", response_model=List[LMP]) + async def get_lmp_versions( + fqn: str, + serializer: EllClient = Depends(get_serializer)): + return serializer.get_lmp_versions(fqn) + + @app.get("/lmp/{lmp_id}", response_model=GetLMPResponse) + async def get_lmp(lmp_id: str, + serializer: EllClient = Depends(get_serializer), + # todo. figure out the ramifications of doing this here + # session: Session = Depends(get_session) + ): + lmp = await serializer.get_lmp(lmp_id=lmp_id) + if lmp is None: + raise HTTPException(status_code=404, detail="LMP not found") + return lmp + + @app.post("/lmp") + async def write_lmp( + lmp: WriteLMPInput, + # fixme. what is this type supposed to be? + uses: List[str], # SerializedLMPUses, + publisher: Publisher = Depends(get_publisher), + serializer: EllClient = Depends(get_serializer) + ): + await serializer.write_lmp(lmp, uses) + + loop = asyncio.get_event_loop() + loop.create_task( + publisher.publish( + f"lmp/{lmp.lmp_id}/created", + json.dumps({ + "lmp": lmp.model_dump(), + "uses": uses + }, default=str) + ) + ) + + @app.post("/invocation", response_model=WriteInvocationInput) + async def write_invocation( + input: WriteInvocationInput, + publisher: Publisher = Depends(get_publisher), + serializer: EllClient = Depends(get_serializer) + ): + logger.info(f"Writing invocation {input.invocation.lmp_id}") + # TODO: return anything this might create like invocation id + result = await serializer.write_invocation(input) + + loop = asyncio.get_event_loop() + loop.create_task( + publisher.publish( + f"lmp/{input.invocation.lmp_id}/invoked", + LMPInvokedEvent( + lmp_id=input.invocation.lmp_id, + # invocation_id=invo.id, + # todo. return data from write invocation + consumes=[] + ).model_dump_json() + ) + ) + return input + + return app diff --git a/tests/api/test_api.py b/tests/api/test_api.py new file mode 100644 index 000000000..824d03d2d --- /dev/null +++ b/tests/api/test_api.py @@ -0,0 +1,198 @@ +from datetime import timezone +from logging import DEBUG +from uuid import uuid4 +import pytest +from typing import Any, Dict +from fastapi.testclient import TestClient + +from ell.api.client import EllSqliteClient +from ell.api.server import NoopPublisher, create_app, get_publisher, get_serializer +from ell.api.config import Config +from ell.api.logger import setup_logging +from ell.types.serialize import utc_now +from ell.stores.studio import SerializedLMP +from ell.types.lmp import LMPType +from ell.types.serialize import WriteLMPInput + + +@pytest.fixture +def sql_store() -> EllSqliteClient: + return EllSqliteClient(":memory:") + + +def test_construct_serialized_lmp(): + serialized_lmp = SerializedLMP( + lmp_id="test_lmp_id", + name="Test LMP", + source="def test_function(): pass", + dependencies=str(["dep1", "dep2"]), + lmp_type=LMPType.LM, + api_params={"param1": "value1"}, + version_number=1, + # uses={"used_lmp_1": {}, "used_lmp_2": {}}, + initial_global_vars={"global_var1": "value1"}, + initial_free_vars={"free_var1": "value2"}, + commit_message="Initial commit", + created_at=utc_now() + ) + assert serialized_lmp.lmp_id == "test_lmp_id" + assert serialized_lmp.name == "Test LMP" + assert serialized_lmp.source == "def test_function(): pass" + assert serialized_lmp.dependencies == str(["dep1", "dep2"]) + assert serialized_lmp.api_params == {"param1": "value1"} + assert serialized_lmp.version_number == 1 + assert serialized_lmp.created_at is not None + + +def test_write_lmp_input(): + # Should be able to construct a WriteLMPInput from data + input = WriteLMPInput( + lmp_id="test_lmp_id", + name="Test LMP", + source="def test_function(): pass", + dependencies=str(["dep1", "dep2"]), + lmp_type=LMPType.LM, + api_params={"param1": "value1"}, + initial_global_vars={"global_var1": "value1"}, + initial_free_vars={"free_var1": "value2"}, + commit_message="Initial commit", + version_number=1, + ) + + # Should default a created_at to utc_now + assert input.created_at is not None + assert input.created_at.tzinfo == timezone.utc + + # Should be able to construct a SerializedLMP from a WriteLMPInput + model = SerializedLMP(**input.model_dump()) + assert model.created_at == input.created_at + + input2 = WriteLMPInput( + lmp_id="test_lmp_id", + name="Test LMP", + source="def test_function(): pass", + dependencies=str(["dep1", "dep2"]), + lmp_type=LMPType.LM, + api_params={"param1": "value1"}, + initial_global_vars={"global_var1": "value1"}, + initial_free_vars={"free_var1": "value2"}, + commit_message="Initial commit", + version_number=1, + # should work with an isoformat string + created_at=utc_now().isoformat() # type: ignore + ) + model2 = SerializedLMP(**input2.model_dump()) + assert model2.created_at == input2.created_at + assert input2.created_at is not None + assert input2.created_at.tzinfo == timezone.utc + + +def create_test_app(sql_store: EllSqliteClient): + setup_logging(DEBUG) + config = Config(storage_dir=":memory:") + app = create_app(config) + + publisher = NoopPublisher() + + async def get_publisher_override(): + yield publisher + + + def get_serializer_override(): + return sql_store + + app.dependency_overrides[get_publisher] = get_publisher_override + app.dependency_overrides[get_serializer] = get_serializer_override + + client = TestClient(app) + + return app, client, publisher, config + + +def test_write_lmp(sql_store: EllSqliteClient): + _app, client, *_ = create_test_app(sql_store) + + # fime. figure out what's going on with `uses` + lmp_data: Dict[str, Any] = { + "lmp_id": uuid4().hex, + "name": "Test LMP", + "source": "def test_function(): pass", + "dependencies": str(["dep1", "dep2"]), + "lmp_type": LMPType.LM, + "api_params": {"param1": "value1"}, + "version_number": 1, + # "uses": {"used_lmp_1": {}, "used_lmp_2": {}}, + "initial_global_vars": {"global_var1": "value1"}, + "initial_free_vars": {"free_var1": "value2"}, + "commit_message": "Initial commit", + "created_at": utc_now().isoformat().replace("+00:00", "Z") + } + # uses: List[str] = {"used_lmp_1": {}, "used_lmp_2": {}}, + + response = client.post( + "/lmp", + json={ + "lmp": lmp_data, + "uses": [] + } + ) + + assert response.status_code == 200 + + lmp = client.get(f"/lmp/{lmp_data['lmp_id']}") + assert lmp.status_code == 200 + # del lmp_data["uses"] + assert lmp.json() == {**lmp_data, "num_invocations": 0} + + +def test_write_invocation(sql_store: EllSqliteClient): + _app, client, *_ = create_test_app(sql_store) + + lmp_id = uuid4().hex + lmp_data: Dict[str, Any] = { + "lmp_id": lmp_id, + "name": "Test LMP", + "source": "def test_function(): pass", + "dependencies": str(["dep1", "dep2"]), + "lmp_type": LMPType.LM, + "api_params": {"param1": "value1"}, + } + response = client.post( + "/lmp", + json={'lmp': lmp_data, 'uses': []} + ) + try: + assert response.status_code == 200 + except Exception as e: + print(response.json()) + raise e + + invocation_data = { + "id": uuid4().hex, + "lmp_id": lmp_id, + "args": ["arg1", "arg2"], + "kwargs": {"kwarg1": "value1"}, + "global_vars": {"global_var1": "value1"}, + "free_vars": {"free_var1": "value2"}, + "latency_ms": 100.0, + "invocation_kwargs": {"model": "gpt-4o", "messages": [{"role": "system", "content": "You are a JSON parser. You respond only in JSON. Do not format using markdown."}, {"role": "user", "content": "You are given the following task: \"What is two plus two?\"\n Parse the task into the following type:\n {'$defs': {'Add': {'properties': {'op': {'const': '+', 'enum': ['+'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Add', 'type': 'object'}, 'Div': {'properties': {'op': {'const': '/', 'enum': ['/'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Div', 'type': 'object'}, 'Mul': {'properties': {'op': {'const': '*', 'enum': ['*'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Mul', 'type': 'object'}, 'Sub': {'properties': {'op': {'const': '-', 'enum': ['-'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Sub', 'type': 'object'}}, 'anyOf': [{'$ref': '#/$defs/Add'}, {'$ref': '#/$defs/Sub'}, {'$ref': '#/$defs/Mul'}, {'$ref': '#/$defs/Div'}]}\n "}], "lm_kwargs": {"temperature": 0.1}, "client": None}, + "contents": { } + } + consumes_data = [] + + input = { + "invocation": invocation_data, + "consumes": consumes_data + } + response = client.post( + "/invocation", + json=input + ) + + print(response.json()) + assert response.status_code == 200 + # assert response.json() == input + + +if __name__ == "__main__": + pytest.main() From 29dd707e541a48e6f89c9321ff9f72f7a23e2d5b Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 10:47:17 -0700 Subject: [PATCH 08/40] start add api extras --- poetry.lock | 40 +++++++++++++++++++++++++++++++++++----- pyproject.toml | 7 ++++++- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/poetry.lock b/poetry.lock index 29315d918..d77ffd90a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,20 @@ # This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +[[package]] +name = "aiomqtt" +version = "2.3.0" +description = "The idiomatic asyncio MQTT client, wrapped around paho-mqtt" +optional = true +python-versions = "<4.0,>=3.8" +files = [ + {file = "aiomqtt-2.3.0-py3-none-any.whl", hash = "sha256:127926717bd6b012d1630f9087f24552eb9c4af58205bc2964f09d6e304f7e63"}, + {file = "aiomqtt-2.3.0.tar.gz", hash = "sha256:312feebe20bc76dc7c20916663011f3bd37aa6f42f9f687a19a1c58308d80d47"}, +] + +[package.dependencies] +paho-mqtt = ">=2.1.0,<3.0.0" +typing-extensions = {version = ">=4.4.0,<5.0.0", markers = "python_version < \"3.10\""} + [[package]] name = "alabaster" version = "0.7.16" @@ -1121,6 +1136,20 @@ files = [ {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] +[[package]] +name = "paho-mqtt" +version = "2.1.0" +description = "MQTT version 5.0/3.1.1 client class" +optional = true +python-versions = ">=3.7" +files = [ + {file = "paho_mqtt-2.1.0-py3-none-any.whl", hash = "sha256:6db9ba9b34ed5bc6b6e3812718c7e06e2fd7444540df2455d2c51bd58808feee"}, + {file = "paho_mqtt-2.1.0.tar.gz", hash = "sha256:12d6e7511d4137555a3f6ea167ae846af2c7357b10bc6fa4f7c3968fc1723834"}, +] + +[package.extras] +proxy = ["pysocks"] + [[package]] name = "pathspec" version = "0.12.1" @@ -1576,13 +1605,13 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "rich" -version = "13.9.3" +version = "13.9.4" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" optional = true python-versions = ">=3.8.0" files = [ - {file = "rich-13.9.3-py3-none-any.whl", hash = "sha256:9836f5096eb2172c9e77df411c1b009bace4193d6a481d534fea75ebba758283"}, - {file = "rich-13.9.3.tar.gz", hash = "sha256:bc1e01b899537598cf02579d2b9f4a415104d3fc439313a7a2c165d76557a08e"}, + {file = "rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90"}, + {file = "rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098"}, ] [package.dependencies] @@ -2399,6 +2428,7 @@ type = ["pytest-mypy"] [extras] all = ["anthropic", "fastapi", "groq", "sqlmodel", "uvicorn"] anthropic = ["anthropic"] +api-server = ["aiomqtt", "fastapi", "uvicorn"] groq = ["groq"] postgres = ["sqlmodel"] sqlite = ["sqlmodel"] @@ -2406,5 +2436,5 @@ studio = ["fastapi", "sqlmodel", "uvicorn"] [metadata] lock-version = "2.0" -python-versions = ">=3.9" -content-hash = "a8e393e75d5f060226d0b25c4306a1d504ac68e92650a7aa34577e396620490d" +python-versions = ">=3.9,<4.0" +content-hash = "49a283549a81a5b13eade523c0cae915a003660185f199a60b21fee249a9e184" diff --git a/pyproject.toml b/pyproject.toml index 1a9991c03..dba1256ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ include = [ ] [tool.poetry.dependencies] -python = ">=3.9" +python = ">=3.9,<4.0" numpy = ">=1.26.0" dill = "^0.3.8" colorama = "^0.4.6" @@ -46,6 +46,9 @@ sqlmodel = { version = ">=0.0.21, <0.1.0", optional = true } # Studio fastapi = { version = "^0.111.1", optional = true } uvicorn = { version = "^0.30.3", optional = true } +# Studio + API Server (optional) +aiomqtt = { version="^2.3.0", optional = true } + [tool.poetry.group.dev.dependencies] pytest = "^8.3.2" @@ -60,6 +63,8 @@ groq = ["groq"] sqlite = [ 'sqlmodel' ] postgres = ['sqlmodel', 'psychopg2'] studio = ['fastapi', 'uvicorn', 'sqlmodel'] +api_server = ["fastapi", "uvicorn"] +mqtt = ["aiomqtt"] all = [ "anthropic", "groq", From 59bf4cb97ddbf2452a353ec8f23e49d9ba104400 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 12:04:13 -0700 Subject: [PATCH 09/40] add dockerignore --- .dockerignore | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..a1154a08a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,11 @@ +**/__pycache__/ +.pytest_cache/ +.git +.github +.vscode +.DS_Store +.env +docs +examples +tests +x From bea7d6abafd32622b87d220926b3a20cb5d62f10 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 12:04:51 -0700 Subject: [PATCH 10/40] fix typo --- poetry.lock | 7 ++++--- pyproject.toml | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index d77ffd90a..d01b066fa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2428,13 +2428,14 @@ type = ["pytest-mypy"] [extras] all = ["anthropic", "fastapi", "groq", "sqlmodel", "uvicorn"] anthropic = ["anthropic"] -api-server = ["aiomqtt", "fastapi", "uvicorn"] +api-server = ["fastapi", "uvicorn"] groq = ["groq"] -postgres = ["sqlmodel"] +mqtt = ["aiomqtt"] +postgres = ["psycopg2", "sqlmodel"] sqlite = ["sqlmodel"] studio = ["fastapi", "sqlmodel", "uvicorn"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "49a283549a81a5b13eade523c0cae915a003660185f199a60b21fee249a9e184" +content-hash = "5e05acc71c0666a38c06b7837fa7ecb02e5615dd4bfd5ed7573541730e904456" diff --git a/pyproject.toml b/pyproject.toml index dba1256ae..10382a088 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,9 +61,9 @@ sphinx-rtd-theme = "^2.0.0" anthropic = ["anthropic"] groq = ["groq"] sqlite = [ 'sqlmodel' ] -postgres = ['sqlmodel', 'psychopg2'] +postgres = ['sqlmodel', 'psycopg2'] studio = ['fastapi', 'uvicorn', 'sqlmodel'] -api_server = ["fastapi", "uvicorn"] +api-server = ["fastapi", "uvicorn"] mqtt = ["aiomqtt"] all = [ "anthropic", From 8e8428d58a70075cd7f1398253e2d13d4ee3b618 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 12:06:50 -0700 Subject: [PATCH 11/40] add docker build arguments with ell extras --- docker/Dockerfile.api | 4 +++- docker/Dockerfile.studio | 4 +++- docker/docker-compose.yml | 12 ++++++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/docker/Dockerfile.api b/docker/Dockerfile.api index 24e375610..4d00ff524 100644 --- a/docker/Dockerfile.api +++ b/docker/Dockerfile.api @@ -2,6 +2,8 @@ FROM python:3.12 WORKDIR /app +ARG ELL_EXTRAS="api-server postgres mqtt" + # Install system dependencies RUN apt-get update && apt-get install -y \ gcc \ @@ -15,7 +17,7 @@ COPY pyproject.toml poetry.lock* ./ # Project initialization: RUN poetry config virtualenvs.create false \ - && poetry install --no-interaction --no-ansi + && poetry install --extras="${ELL_EXTRAS}" --no-interaction --no-ansi # Copy project COPY src . diff --git a/docker/Dockerfile.studio b/docker/Dockerfile.studio index 760a6c84d..6c860c822 100644 --- a/docker/Dockerfile.studio +++ b/docker/Dockerfile.studio @@ -3,6 +3,8 @@ FROM node:20 AS client-builder WORKDIR /app/ell-studio +ARG ELL_EXTRAS="studio postgres mqtt" + # Copy package.json and package-lock.json (if available) COPY ell-studio/package.json ell-studio/package-lock.json* ./ @@ -33,7 +35,7 @@ COPY pyproject.toml poetry.lock* ./ # Project initialization: RUN poetry config virtualenvs.create false \ - && poetry install --no-interaction --no-ansi + && poetry install --extras "${ELL_EXTRAS}" --no-interaction --no-ansi # Copy the Python project COPY src . diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index bcd2ee930..837b4a7db 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,8 +1,10 @@ services: api: build: - context: . - dockerfile: Dockerfile.api + context: .. + dockerfile: docker/Dockerfile.api + args: + ELL_EXTRAS: api-server postgres mqtt tags: - ell-api @@ -20,8 +22,10 @@ services: studio: build: - context: . - dockerfile: Dockerfile.studio + context: .. + dockerfile: docker/Dockerfile.studio + args: + ELL_EXTRAS: studio postgres mqtt tags: - ell-studio ports: From 5298d412029b5d74be46e7cc4115c2adb084c90d Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 12:08:21 -0700 Subject: [PATCH 12/40] add missing api-server main --- src/ell/api/__main__.py | 58 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 src/ell/api/__main__.py diff --git a/src/ell/api/__main__.py b/src/ell/api/__main__.py new file mode 100644 index 000000000..acdfe4089 --- /dev/null +++ b/src/ell/api/__main__.py @@ -0,0 +1,58 @@ +import asyncio +import os +import uvicorn +import logging +from argparse import ArgumentParser + + +from ell.api.config import Config +from ell.api.server import create_app +from ell.api.logger import setup_logging + + + + +def main(): + log_level = os.environ.get("LOG_LEVEL", logging.INFO) + setup_logging(level=log_level) + + parser = ArgumentParser(description="ELL API Server") + parser.add_argument("--storage-dir", default=None, + help="Storage directory (default: None)") + parser.add_argument("--pg-connection-string", default=None, + help="PostgreSQL connection string (default: None)") + parser.add_argument("--mqtt-connection-string", default=None, + help="MQTT connection string (default: None)") + parser.add_argument("--host", default=None, + help="Host to run the server on") + parser.add_argument("--port", type=int, default=None, + help="Port to run the server on") + parser.add_argument("--dev", action="store_true", + help="Run in development mode") + args = parser.parse_args() + + config = Config( + storage_dir=args.storage_dir, + pg_connection_string=args.pg_connection_string, + mqtt_connection_string=args.mqtt_connection_string, + ) + + app = create_app(config) + + loop = asyncio.new_event_loop() + + config = uvicorn.Config( + app=app, + host=args.host if args.host else os.environ.get("HOST", "0.0.0.0"), + port=args.port if args.port else int(os.environ.get("PORT", 8081)), + loop=loop # type: ignore + ) + server = uvicorn.Server(config) + + loop.create_task(server.serve()) + + loop.run_forever() + + +if __name__ == "__main__": + main() From ed437698529a53a68efefb48d0b02d1c4a12b49a Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 12:09:29 -0700 Subject: [PATCH 13/40] sidestep package not installed error --- src/ell/__version__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ell/__version__.py b/src/ell/__version__.py index ffc218495..5fb3a6916 100644 --- a/src/ell/__version__.py +++ b/src/ell/__version__.py @@ -1,6 +1,6 @@ -try: - from importlib.metadata import version -except ImportError: - from importlib_metadata import version +from importlib.metadata import version, PackageNotFoundError -__version__ = version("ell-ai") +try: + __version__ = version("ell-ai") +except PackageNotFoundError: + __version__ = "unknown" \ No newline at end of file From c2fa5c7fa1e985da955f9b9415a9c0f63c6745eb Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sat, 2 Nov 2024 12:19:19 -0700 Subject: [PATCH 14/40] fix: import base model from pydantic --- src/ell/types/serialize.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ell/types/serialize.py b/src/ell/types/serialize.py index 40ad0a02d..af6a1357e 100644 --- a/src/ell/types/serialize.py +++ b/src/ell/types/serialize.py @@ -3,8 +3,7 @@ from functools import cached_property from typing import Any, Dict, List, Optional, Union -from openai import BaseModel -from pydantic import AwareDatetime, Field +from pydantic import BaseModel, AwareDatetime, Field from ell.types.lmp import LMPType from ell.types.message import Message From d73f17be6614be824949631eb6b18ff0cd9a65c9 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 3 Nov 2024 08:40:15 -0800 Subject: [PATCH 15/40] checkpoint --- src/ell/studio/__main__.py | 7 ++- src/ell/studio/config.py | 6 +- src/ell/studio/mqtt_pubsub.py | 85 ++++++++++++++++++++++++++ src/ell/studio/pubsub.py | 108 ++++++++++++++++++++++++++++++++++ src/ell/studio/server.py | 74 +++++++++++++++++++++-- src/ell/util/pubsub.py | 82 ++++++++++++++++++++++++++ 6 files changed, 353 insertions(+), 9 deletions(-) create mode 100644 src/ell/studio/mqtt_pubsub.py create mode 100644 src/ell/studio/pubsub.py create mode 100644 src/ell/util/pubsub.py diff --git a/src/ell/studio/__main__.py b/src/ell/studio/__main__.py index 484aa18bf..59da2f9cc 100644 --- a/src/ell/studio/__main__.py +++ b/src/ell/studio/__main__.py @@ -36,7 +36,9 @@ def main(): help="Directory for filesystem serializer storage (default: current directory)") parser.add_argument("--pg-connection-string", default=None, help="PostgreSQL connection string (default: None)") - parser.add_argument("--host", default="127.0.0.1", help="Host to run the server on (default: localhost)") + parser.add_argument("--mqtt-connection-string", default=None, + help="MQTT connection string (default: None)") + parser.add_argument("--host", default="0.0.0.0", help="Host to run the server on (default: 0.0.0.0)") parser.add_argument("--port", type=int, default=5555, help="Port to run the server on (default: 5555)") parser.add_argument("--dev", action="store_true", help="Run in development mode") parser.add_argument("--dev-static-dir", default=None, help="Directory to serve static files from in development mode") @@ -50,7 +52,8 @@ def main(): assert args.port == 5555, "Port must be 5000 in development mode" config = Config.create(storage_dir=args.storage_dir, - pg_connection_string=args.pg_connection_string) + pg_connection_string=args.pg_connection_string, + mqtt_connection_string=args.mqtt_connection_string) app = create_app(config) if not args.dev: diff --git a/src/ell/studio/config.py b/src/ell/studio/config.py index 851c9c5e5..b252732f5 100644 --- a/src/ell/studio/config.py +++ b/src/ell/studio/config.py @@ -18,12 +18,14 @@ def ell_home() -> str: class Config(BaseModel): pg_connection_string: Optional[str] = None storage_dir: Optional[str] = None + mqtt_connection_string: Optional[str] = None @classmethod def create( cls, storage_dir: Optional[str] = None, pg_connection_string: Optional[str] = None, + mqtt_connection_string: Optional[str] = None, ) -> 'Config': pg_connection_string = pg_connection_string or os.getenv("ELL_PG_CONNECTION_STRING") storage_dir = storage_dir or os.getenv("ELL_STORAGE_DIR") @@ -37,4 +39,6 @@ def create( # This intends to honor the default we had set in the CLI storage_dir = os.getcwd() - return cls(pg_connection_string=pg_connection_string, storage_dir=storage_dir) \ No newline at end of file + mqtt_connection_string = mqtt_connection_string or os.getenv("ELL_MQTT_CONNECTION_STRING") + + return cls(pg_connection_string=pg_connection_string, storage_dir=storage_dir, mqtt_connection_string=mqtt_connection_string) \ No newline at end of file diff --git a/src/ell/studio/mqtt_pubsub.py b/src/ell/studio/mqtt_pubsub.py new file mode 100644 index 000000000..6450f6336 --- /dev/null +++ b/src/ell/studio/mqtt_pubsub.py @@ -0,0 +1,85 @@ +import asyncio +import json +import logging + +import aiomqtt + +from ell.studio.pubsub import WebSocketPubSub, Subscriber + +logger = logging.getLogger(__name__) + + +class MqttWebSocketPubSub(WebSocketPubSub): + mqtt_client: aiomqtt.Client + + def __init__(self, conn: aiomqtt.Client): + super().__init__() + self.mqtt_client = conn + + def listen(self, loop: asyncio.AbstractEventLoop): + self.listener = loop.create_task(self._relay_all()) + return self.listener + + async def publish(self, topic: str, message: str) -> None: + # this is a bit sus because we could get in a loop if the message is echoed back + # we're also publishing to mqtt, not websocket clients + await self.mqtt_client.publish(topic, message) + + async def _relay_all(self) -> None: + """ + Relays all messages received on the subscribed MQTT topics to the websocket subscribers on the same topics. + + Example: + self.subscribe("detailed-telemetry/#") # <- Registers us to receive MQTT messages published to detailed-telemetry/1, detailed-telemetry/2, ... + + Upon receipt, we forward these messages to any connected Ell Studio websockets whose subscription matches the published topic . + + i.e.: + Subscriptions map: + "detailed-telemetry/1" -> [socket1] + "detailed-telemetry/2" -> [socket2] + "lmp/#" -> [socket1, socket2] + - An MQTT message published to detailed-telemetry/1 will be relayed to socket1 + - An MQTT message published to lmp/42 will be relayed to socket1 and socket2 + + + """ + logger.info("Starting mqtt listener") + async for message in self.mqtt_client.messages: + try: + logger.debug(f"Received message on topic {message.topic}: {message.payload}") + # Call the websocket's publish method to publish the message received from MQTT to the websocket + await super().publish(str(message.topic), json.loads( + message.payload # type: ignore + )) + except Exception as e: + logger.error(f"Error relaying message: {e}") + + async def subscribe_async(self, topic: str, subscriber: Subscriber) -> None: + await self.mqtt_client.subscribe(topic) + super().subscribe(topic, subscriber) + + +async def setup(mqtt_connection_string: str) -> tuple[MqttWebSocketPubSub, aiomqtt.Client]: # type: ignore + """Setup MQTT PubSub with retry logic.""" + retry_interval_seconds = 1 + retry_max_attempts = 5 + + for attempt in range(retry_max_attempts): + try: + host, port = mqtt_connection_string.split("://")[1].split(":") + logger.info(f"Connecting to MQTT broker at {host}:{port}") + + # Create the client - it will connect when used as context manager + mqtt_client = aiomqtt.Client(hostname=host, port=int(port) if port else 1883) + await mqtt_client.__aenter__() + return MqttWebSocketPubSub(mqtt_client), mqtt_client + + except aiomqtt.MqttError as e: + logger.error(f"Failed to connect to MQTT [Attempt {attempt + 1}/{retry_max_attempts}]: {e}") + if attempt < retry_max_attempts - 1: + await asyncio.sleep(retry_interval_seconds) + continue + else: + logger.error("Max retry attempts reached. Unable to connect to MQTT.") + raise ValueError(f"Failed to connect to MQTT after {retry_max_attempts} attempts") from e diff --git a/src/ell/studio/pubsub.py b/src/ell/studio/pubsub.py new file mode 100644 index 000000000..1f00225c4 --- /dev/null +++ b/src/ell/studio/pubsub.py @@ -0,0 +1,108 @@ +from abc import ABC, abstractmethod +import asyncio +import logging +from typing import Any, List + +from fastapi import WebSocket + +from ell.util.pubsub import topic_matches, validate_publish_topic, validate_subscription_pattern + +logger = logging.getLogger(__name__) + +Subscriber = WebSocket + + +class PubSub(ABC): + @abstractmethod + async def publish(self, topic: str, message: str) -> None: + pass + + @abstractmethod + def subscribe(self, topic: str, subscriber: Subscriber) -> None: + pass + + @abstractmethod + async def subscribe_async(self, topic: str, subscriber: Subscriber) -> None: + pass + + @abstractmethod + def unsubscribe(self, topic: str, subscriber: Subscriber): + pass + + @abstractmethod + def unsubscribe_from_all(self, subscriber: Subscriber): + pass + + + @abstractmethod + def get_subscriptions(self, subscriber: Subscriber) -> List[str]: + pass + + +class WebSocketPubSub(PubSub): + def __init__(self): + # Topic pattern -> subscribed websockets + self.subscriptions: dict[str, list[Subscriber]] = {} + # Reverse index for self.subscriptions (websocket -> their subscribed topic patterns) + self.subscribers: dict[Subscriber, list[str]] = {} + + async def publish(self, topic: str, message: Any): + validate_publish_topic(topic) + # Notify all subscribers whose subscription pattern is a match for `topic` + subscriptions = self.subscriptions.copy() # copy to avoid mutating while iterating + logger.info(f"Relaying message to socket {topic} subscribers") + for pattern in subscriptions: + if topic_matches(topic, pattern): + for subscriber in subscriptions[pattern]: + asyncio.create_task(subscriber.send_json( + {"topic": topic, "message": message})) + + def subscribe(self, topic_pattern: str, subscriber: Subscriber) -> None: + """Subscribes the websocket `subscriber` to receive messages matching the topic pattern `topic`""" + validate_subscription_pattern(topic_pattern) + logger.info(f"Subscribing ws {subscriber} to {topic_pattern}") + # Add the subscriber to the list for the topic + if topic_pattern not in self.subscriptions: + self.subscriptions[topic_pattern] = [] + self.subscriptions[topic_pattern].append(subscriber) + if subscriber not in self.subscribers: + self.subscribers[subscriber] = [] + self.subscribers[subscriber].append(topic_pattern) + + def unsubscribe(self, topic: str, subscriber: Subscriber): + """Unsubscribes the websocket `subscriber` from the topic pattern `topic`""" + subscriptions = self.subscriptions.copy() + if topic in subscriptions: + # Try to apply the edit to the original subscriptions map + try: + # Remove the subscriber + self.subscriptions[topic].remove(subscriber) + # Prune the topic from the subscriptions map if the edit resulted in 0 subscribers for the topic + if not self.subscriptions[topic]: + del self.subscriptions[topic] + except Exception: + # If anything goes wrong in updating the subscriptions map, we assume it's concurrency-related + # and the current subscriptions map contains the edit we would have made + pass + + def unsubscribe_from_all(self, subscriber: Subscriber): + """Removes the websocket `subscriber` from all topics. Typically called on socket disconnect.""" + subscribers = self.subscribers.copy() + subscriber_subscriptions = subscribers[subscriber] + if subscriber_subscriptions: + for topic in subscriber_subscriptions: + self.unsubscribe(topic, subscriber) + try: + del self.subscribers[subscriber] + except KeyError: + pass + + async def subscribe_async(self, topic_pattern: str, subscriber: Subscriber) -> None: + """Subscribes the websocket `subscriber` to receive messages matching the topic pattern `topic`""" + validate_subscription_pattern(topic_pattern) + logger.info(f"Subscribing ws {subscriber} to {topic_pattern}") + self.subscribe(topic_pattern, subscriber) + + def get_subscriptions(self, subscriber: Subscriber) -> List[str]: + """Returns the list of topic patterns that the websocket `subscriber` is subscribed to""" + return self.subscribers.get(subscriber, []) diff --git a/src/ell/studio/server.py b/src/ell/studio/server.py index 2879ea7a2..b9331dda4 100644 --- a/src/ell/studio/server.py +++ b/src/ell/studio/server.py @@ -1,3 +1,5 @@ +import asyncio +from contextlib import asynccontextmanager from typing import Optional, Dict, Any from sqlmodel import Session @@ -14,8 +16,11 @@ from ell.stores.studio import SerializedLMP from datetime import datetime, timedelta from sqlmodel import select +from contextlib import AsyncExitStack +from ell.studio.pubsub import WebSocketPubSub, PubSub + logger = logging.getLogger(__name__) @@ -31,6 +36,33 @@ def get_serializer(config: Config): raise ValueError("No storage configuration found") +pubsub: Optional[PubSub] = None + +async def get_pubsub(): + yield pubsub + + +async def setup_pubsub(config: Config, exit_stack: AsyncExitStack): + """Set up the appropriate pubsub client based on configuration.""" + if config.storage_dir is not None: + return WebSocketPubSub(), None + + if config.mqtt_connection_string is not None: + try: + from ell.studio.mqtt_pubsub import setup + except ImportError as e: + raise ImportError( + "Received mqtt_connection_string but dependencies missing. Install with `pip install -U ell-ai[mqtt]. More info: https://docs.ell.so/installation") from e + + pubsub, mqtt_client = await setup(config.mqtt_connection_string) + # await exit_stack.enter_async_context(mqtt_client) + exit_stack.push_async_exit(mqtt_client) + logger.info("Connected to MQTT") + + loop = asyncio.get_event_loop() + return pubsub, pubsub.listen(loop) + + return None, None def create_app(config:Config): serializer = get_serializer(config) @@ -39,7 +71,29 @@ def get_session(): with Session(serializer.engine) as session: yield session - app = FastAPI(title="ell Studio", version=__version__) + @asynccontextmanager + async def lifespan(app: FastAPI): + global pubsub + exit_stack = AsyncExitStack() + pubsub_task = None + + try: + pubsub, pubsub_task = await setup_pubsub(config, exit_stack) + yield + + finally: + if pubsub_task and not pubsub_task.done(): + pubsub_task.cancel() + try: + await pubsub_task + except asyncio.CancelledError: + pass + + await exit_stack.aclose() + pubsub = None + + + app = FastAPI(title="ell Studio", version=__version__, lifespan=lifespan) # Enable CORS for all origins app.add_middleware( @@ -50,17 +104,21 @@ def get_session(): allow_headers=["*"], ) - manager = ConnectionManager() @app.websocket("/ws") - async def websocket_endpoint(websocket: WebSocket): - await manager.connect(websocket) + async def websocket_endpoint(websocket: WebSocket,pubsub: PubSub = Depends(get_pubsub)): + await websocket.accept() + # NB. for now, studio does not dynamically subscribe to data topics. We subscribe every client to these by + # default. If desired, apps may issue a "subscribe" message that we can handle in websocket.receive_text below + # to sign up to receive data from arbitrary topics. They can unsubscribe when done via an "unsubscribe" message. + await pubsub.subscribe_async("all", websocket) + await pubsub.subscribe_async("lmp/#", websocket) try: while True: data = await websocket.receive_text() # Handle incoming WebSocket messages if needed except WebSocketDisconnect: - manager.disconnect(websocket) + pubsub.unsubscribe_from_all(websocket) @app.get("/api/latest/lmps", response_model=list[SerializedLMPWithUses]) @@ -194,9 +252,13 @@ def get_lmp_history( return history + # Used by studio to publish changes from a SQLLite store directly async def notify_clients(entity: str, id: Optional[str] = None): + if pubsub is None: + logger.error("No pubsub client, cannot notify clients") + return message = json.dumps({"entity": entity, "id": id}) - await manager.broadcast(message) + await pubsub.publish("all", message) # Add this method to the app object app.notify_clients = notify_clients diff --git a/src/ell/util/pubsub.py b/src/ell/util/pubsub.py new file mode 100644 index 000000000..e24c578b4 --- /dev/null +++ b/src/ell/util/pubsub.py @@ -0,0 +1,82 @@ +from functools import lru_cache +from typing import Optional + +MAX_TOPIC_LENGTH = 65535 + +class TopicMatcher: + def __init__(self): + # Cache validation and matching results + self._validate_publish_topic = lru_cache(maxsize=1024)(self._validate_publish_topic_impl) + self._validate_subscription_pattern = lru_cache(maxsize=1024)(self._validate_subscription_pattern_impl) + self.matches = lru_cache(maxsize=4096)(self.matches) + + def _validate_publish_topic_impl(self, topic: str) -> tuple[bool, Optional[str]]: + """Internal implementation that returns (is_valid, error_message)""" + if not topic: + return False, "Topic cannot be empty" + if len(topic) > MAX_TOPIC_LENGTH: + return False, f"Topic exceeds maximum length of {MAX_TOPIC_LENGTH}" + if "#" in topic or "+" in topic: + return False, "Publish topics cannot contain wildcards (# or +)" + return True, None + + def _validate_subscription_pattern_impl(self, pattern: str) -> tuple[bool, Optional[str]]: + """Internal implementation that returns (is_valid, error_message)""" + if not pattern: + return False, "Subscription pattern cannot be empty" + if len(pattern) > MAX_TOPIC_LENGTH: + return False, f"Pattern exceeds maximum length of {MAX_TOPIC_LENGTH}" + if "#/" in pattern: + return False, "Multi-level wildcard (#) must be the last character in the pattern" + + for level in pattern.split("/"): + if len(level) > 1: + if "+" in level: + return False, "Single-level wildcard (+) must be alone in its level" + if "#" in level: + return False, "Multi-level wildcard (#) must be alone in its level" + return True, None + + def validate_publish_topic(self, topic: str) -> None: + """Public method that raises ValueError with specific message if invalid""" + is_valid, error = self._validate_publish_topic(topic) + if not is_valid: + raise ValueError(error) + + def validate_subscription_pattern(self, pattern: str) -> None: + """Public method that raises ValueError with specific message if invalid""" + is_valid, error = self._validate_subscription_pattern(pattern) + if not is_valid: + raise ValueError(error) + + def matches(self, topic: str, pattern: str) -> bool: + """Check if a topic matches a wildcard pattern.""" + # Use cached validation methods + self.validate_publish_topic(topic) + self.validate_subscription_pattern(pattern) + + topic_parts = topic.split("/") + pattern_parts = pattern.split("/") + + # Handle shared subscriptions + if pattern_parts[0] == "$share": + pattern_parts = pattern_parts[2:] + + def match_parts(t_parts: list[str], p_parts: list[str]) -> bool: + if not t_parts: + return not p_parts or p_parts[0] == "#" + if not p_parts: + return False + if p_parts[0] == "#": + return True + if p_parts[0] == "+" or t_parts[0] == p_parts[0]: + return match_parts(t_parts[1:], p_parts[1:]) + return False + + return match_parts(topic_parts, pattern_parts) + +matcher = TopicMatcher() +topic_matches = matcher.matches +validate_publish_topic = matcher.validate_publish_topic +validate_subscription_pattern = matcher.validate_subscription_pattern + From 20533ba2a86e81c4bfa2da91e51a8d9b05bc2226 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 3 Nov 2024 10:08:39 -0800 Subject: [PATCH 16/40] default required deps in studio, api server images --- docker/Dockerfile.api | 2 +- docker/Dockerfile.studio | 9 ++++++--- docker/docker-compose.yml | 6 ++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/docker/Dockerfile.api b/docker/Dockerfile.api index 4d00ff524..0a9d118e7 100644 --- a/docker/Dockerfile.api +++ b/docker/Dockerfile.api @@ -17,7 +17,7 @@ COPY pyproject.toml poetry.lock* ./ # Project initialization: RUN poetry config virtualenvs.create false \ - && poetry install --extras="${ELL_EXTRAS}" --no-interaction --no-ansi + && poetry install --extras="api-server ${ELL_EXTRAS}" --no-interaction --no-ansi # Copy project COPY src . diff --git a/docker/Dockerfile.studio b/docker/Dockerfile.studio index 6c860c822..76217e336 100644 --- a/docker/Dockerfile.studio +++ b/docker/Dockerfile.studio @@ -3,8 +3,6 @@ FROM node:20 AS client-builder WORKDIR /app/ell-studio -ARG ELL_EXTRAS="studio postgres mqtt" - # Copy package.json and package-lock.json (if available) COPY ell-studio/package.json ell-studio/package-lock.json* ./ @@ -20,8 +18,13 @@ RUN npm run build # Now, start with the Python base image FROM python:3.12 +ARG ELL_EXTRAS="" + +RUN echo "ELL_EXTRAS=${ELL_EXTRAS}" + WORKDIR /app + # Install system dependencies RUN apt-get update && apt-get install -y \ gcc \ @@ -35,7 +38,7 @@ COPY pyproject.toml poetry.lock* ./ # Project initialization: RUN poetry config virtualenvs.create false \ - && poetry install --extras "${ELL_EXTRAS}" --no-interaction --no-ansi + && poetry install --extras="studio ${ELL_EXTRAS}" --no-interaction --no-ansi # Copy the Python project COPY src . diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 837b4a7db..1664aa7e0 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,10 +1,12 @@ +name: ell +version: "3.9" services: api: build: context: .. dockerfile: docker/Dockerfile.api args: - ELL_EXTRAS: api-server postgres mqtt + ELL_EXTRAS: postgres mqtt tags: - ell-api @@ -25,7 +27,7 @@ services: context: .. dockerfile: docker/Dockerfile.studio args: - ELL_EXTRAS: studio postgres mqtt + ELL_EXTRAS: postgres mqtt tags: - ell-studio ports: From 918cd4a30b27f5d5e44621de29b8fe04e0e60a1c Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 3 Nov 2024 10:43:51 -0800 Subject: [PATCH 17/40] allow api.pubsub module so code can be shared --- src/ell/api/mqtt_publisher.py | 13 -- src/ell/api/publisher.py | 13 -- src/ell/api/pubsub/__init__.py | 0 src/ell/api/pubsub/abc.py | 36 +++++ .../mqtt_pubsub.py => api/pubsub/mqtt.py} | 19 ++- .../{util/pubsub.py => api/pubsub/topic.py} | 0 .../pubsub.py => api/pubsub/websocket.py} | 38 +----- src/ell/api/server.py | 128 ++++++++++-------- src/ell/studio/server.py | 6 +- tests/api/test_api.py | 6 +- 10 files changed, 125 insertions(+), 134 deletions(-) delete mode 100644 src/ell/api/mqtt_publisher.py delete mode 100644 src/ell/api/publisher.py create mode 100644 src/ell/api/pubsub/__init__.py create mode 100644 src/ell/api/pubsub/abc.py rename src/ell/{studio/mqtt_pubsub.py => api/pubsub/mqtt.py} (83%) rename src/ell/{util/pubsub.py => api/pubsub/topic.py} (100%) rename src/ell/{studio/pubsub.py => api/pubsub/websocket.py} (80%) diff --git a/src/ell/api/mqtt_publisher.py b/src/ell/api/mqtt_publisher.py deleted file mode 100644 index 09681923f..000000000 --- a/src/ell/api/mqtt_publisher.py +++ /dev/null @@ -1,13 +0,0 @@ -# nb this is to keep aiomqtt optional and out of everything else -import aiomqtt - -from ell.api.publisher import Publisher - - -class MqttPub(Publisher): - def __init__(self, conn: aiomqtt.Client): - self.mqtt_client = conn - - async def publish(self, topic: str, message: str) -> None: - await self.mqtt_client.publish(topic, message) - diff --git a/src/ell/api/publisher.py b/src/ell/api/publisher.py deleted file mode 100644 index 09795c542..000000000 --- a/src/ell/api/publisher.py +++ /dev/null @@ -1,13 +0,0 @@ -#todo. under api-server / api.server ... maybe? -from abc import ABC, abstractmethod - - -class Publisher(ABC): - @abstractmethod - async def publish(self, topic: str, message: str) -> None: - pass - - -class NoopPublisher(Publisher): - async def publish(self, topic: str, message: str) -> None: - pass \ No newline at end of file diff --git a/src/ell/api/pubsub/__init__.py b/src/ell/api/pubsub/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/ell/api/pubsub/abc.py b/src/ell/api/pubsub/abc.py new file mode 100644 index 000000000..04551ad73 --- /dev/null +++ b/src/ell/api/pubsub/abc.py @@ -0,0 +1,36 @@ +from abc import ABC, abstractmethod +import logging +from typing import List + +from fastapi import WebSocket + +logger = logging.getLogger(__name__) + +Subscriber = WebSocket + + +class PubSub(ABC): + @abstractmethod + async def publish(self, topic: str, message: str) -> None: + pass + + @abstractmethod + def subscribe(self, topic: str, subscriber: Subscriber) -> None: + pass + + @abstractmethod + async def subscribe_async(self, topic: str, subscriber: Subscriber) -> None: + pass + + @abstractmethod + def unsubscribe(self, topic: str, subscriber: Subscriber): + pass + + @abstractmethod + def unsubscribe_from_all(self, subscriber: Subscriber): + pass + + + @abstractmethod + def get_subscriptions(self, subscriber: Subscriber) -> List[str]: + pass diff --git a/src/ell/studio/mqtt_pubsub.py b/src/ell/api/pubsub/mqtt.py similarity index 83% rename from src/ell/studio/mqtt_pubsub.py rename to src/ell/api/pubsub/mqtt.py index 6450f6336..215bf0f3a 100644 --- a/src/ell/studio/mqtt_pubsub.py +++ b/src/ell/api/pubsub/mqtt.py @@ -4,7 +4,8 @@ import aiomqtt -from ell.studio.pubsub import WebSocketPubSub, Subscriber +from ell.api.pubsub.abc import Subscriber +from ell.api.pubsub.websocket import WebSocketPubSub logger = logging.getLogger(__name__) @@ -60,11 +61,15 @@ async def subscribe_async(self, topic: str, subscriber: Subscriber) -> None: super().subscribe(topic, subscriber) -async def setup(mqtt_connection_string: str) -> tuple[MqttWebSocketPubSub, aiomqtt.Client]: # type: ignore - """Setup MQTT PubSub with retry logic.""" - retry_interval_seconds = 1 - retry_max_attempts = 5 - +async def setup( + mqtt_connection_string: str, + retry_interval_seconds: int = 1, + retry_max_attempts: int = 5 +) -> tuple[MqttWebSocketPubSub, aiomqtt.Client]: # type: ignore + """ + Connect to the MQTT broker at `mqtt_connection_string` using the provided retry policy. + Returns the client and the open connection which should be handled by an AsyncExitStack or similar. + """ for attempt in range(retry_max_attempts): try: host, port = mqtt_connection_string.split("://")[1].split(":") @@ -72,6 +77,8 @@ async def setup(mqtt_connection_string: str) -> tuple[MqttWebSocketPubSub, aiomq # Create the client - it will connect when used as context manager mqtt_client = aiomqtt.Client(hostname=host, port=int(port) if port else 1883) + # We call __aenter__ here in order to connect and retry on failure + # The client is passed back and must be handled with __aclose__() await mqtt_client.__aenter__() return MqttWebSocketPubSub(mqtt_client), mqtt_client diff --git a/src/ell/util/pubsub.py b/src/ell/api/pubsub/topic.py similarity index 100% rename from src/ell/util/pubsub.py rename to src/ell/api/pubsub/topic.py diff --git a/src/ell/studio/pubsub.py b/src/ell/api/pubsub/websocket.py similarity index 80% rename from src/ell/studio/pubsub.py rename to src/ell/api/pubsub/websocket.py index 1f00225c4..070511d44 100644 --- a/src/ell/studio/pubsub.py +++ b/src/ell/api/pubsub/websocket.py @@ -1,42 +1,8 @@ -from abc import ABC, abstractmethod import asyncio -import logging from typing import Any, List -from fastapi import WebSocket - -from ell.util.pubsub import topic_matches, validate_publish_topic, validate_subscription_pattern - -logger = logging.getLogger(__name__) - -Subscriber = WebSocket - - -class PubSub(ABC): - @abstractmethod - async def publish(self, topic: str, message: str) -> None: - pass - - @abstractmethod - def subscribe(self, topic: str, subscriber: Subscriber) -> None: - pass - - @abstractmethod - async def subscribe_async(self, topic: str, subscriber: Subscriber) -> None: - pass - - @abstractmethod - def unsubscribe(self, topic: str, subscriber: Subscriber): - pass - - @abstractmethod - def unsubscribe_from_all(self, subscriber: Subscriber): - pass - - - @abstractmethod - def get_subscriptions(self, subscriber: Subscriber) -> List[str]: - pass +from ell.api.pubsub.abc import PubSub, Subscriber, logger +from ell.api.pubsub.topic import validate_publish_topic, topic_matches, validate_subscription_pattern class WebSocketPubSub(PubSub): diff --git a/src/ell/api/server.py b/src/ell/api/server.py index d6fa67f75..7e449bc0f 100644 --- a/src/ell/api/server.py +++ b/src/ell/api/server.py @@ -1,26 +1,43 @@ # todo. under ell.api.server.___main___ import asyncio -from contextlib import asynccontextmanager +from contextlib import asynccontextmanager, AsyncExitStack import json import logging from typing import List, Optional -# fixme. get this out of here -import aiomqtt from fastapi import Depends, FastAPI, HTTPException -from ell.api.client import EllClient, EllPostgresClient, EllSqliteClient +from ell.api.client import EllClient from ell.api.config import Config -from ell.api.publisher import NoopPublisher, Publisher +from ell.api.pubsub.abc import PubSub from ell.types.serialize import GetLMPResponse, LMPInvokedEvent, WriteInvocationInput, WriteLMPInput, LMP logger = logging.getLogger(__name__) -publisher: Optional[Publisher] = None +pubsub: Optional[PubSub] = None -async def get_publisher(): - yield publisher +async def get_pubsub(): + yield pubsub + +async def init_pubsub(config: Config, exit_stack: AsyncExitStack): + """Set up the appropriate pubsub client based on configuration.""" + if config.mqtt_connection_string is not None: + try: + from ell.api.pubsub.mqtt import setup + except ImportError as e: + raise ImportError( + "Received mqtt_connection_string but dependencies missing. Install with `pip install -U ell-ai[mqtt]. More info: https://docs.ell.so/installation") from e + + pubsub, mqtt_client = await setup(config.mqtt_connection_string) + + exit_stack.push_async_exit(mqtt_client) + + loop = asyncio.get_event_loop() + return pubsub, pubsub.listen(loop) + + return None, None + serializer: Optional[EllClient] = None @@ -57,50 +74,38 @@ def get_serializer(): return serializer -# def get_session(): -# if serializer is None: -# raise ValueError("Serializer not initialized") -# with Session(serializer.engine) as session: -# yield session - - def create_app(config: Config): # setup_logging(config.log_level) @asynccontextmanager async def lifespan(app: FastAPI): global serializer - global publisher + global pubsub + exit_stack = AsyncExitStack() + pubsub_task = None logger.info("Starting lifespan") serializer = init_serializer(config) - if config.mqtt_connection_string is not None: - try: - from ell.api.mqtt_publisher import MqttPub - except ImportError: - raise ImportError("Missing MQTT dependencies. Install them with `pip install -U ell-ai[mqtt]") - - # fixme. have the class do all of this if possible - host, port = config.mqtt_connection_string.split("://")[1].split(":") - - logger.info(f"Connecting to MQTT broker at {host}:{port}") - try: - async with aiomqtt.Client(host, int(port) if port else 1883) as mqtt: - logger.info("Connected to MQTT") - publisher = MqttPub(mqtt) - yield # Allow the app to run - except aiomqtt.MqttError as e: - logger.error(f"Failed to connect to MQTT", exc_info=e) - publisher = None - else: - publisher = NoopPublisher() - yield # allow the app to run + try: + pubsub, pubsub_task = await init_pubsub(config, exit_stack) + yield + + finally: + if pubsub_task and not pubsub_task.done(): + pubsub_task.cancel() + try: + await pubsub_task + except asyncio.CancelledError: + pass + + await exit_stack.aclose() + pubsub = None app = FastAPI( title="ELL API", - description="API server for ELL", + description="Ell API Server", version="0.1.0", lifespan=lifespan ) @@ -127,44 +132,47 @@ async def write_lmp( lmp: WriteLMPInput, # fixme. what is this type supposed to be? uses: List[str], # SerializedLMPUses, - publisher: Publisher = Depends(get_publisher), + pubsub: PubSub = Depends(get_pubsub), serializer: EllClient = Depends(get_serializer) ): await serializer.write_lmp(lmp, uses) - loop = asyncio.get_event_loop() - loop.create_task( - publisher.publish( - f"lmp/{lmp.lmp_id}/created", - json.dumps({ - "lmp": lmp.model_dump(), - "uses": uses - }, default=str) + if pubsub: + loop = asyncio.get_event_loop() + loop.create_task( + pubsub.publish( + f"lmp/{lmp.lmp_id}/created", + json.dumps({ + "lmp": lmp.model_dump(), + "uses": uses + }, default=str) + ) ) - ) @app.post("/invocation", response_model=WriteInvocationInput) async def write_invocation( input: WriteInvocationInput, - publisher: Publisher = Depends(get_publisher), + pubsub: PubSub = Depends(get_pubsub), serializer: EllClient = Depends(get_serializer) ): logger.info(f"Writing invocation {input.invocation.lmp_id}") # TODO: return anything this might create like invocation id result = await serializer.write_invocation(input) - loop = asyncio.get_event_loop() - loop.create_task( - publisher.publish( - f"lmp/{input.invocation.lmp_id}/invoked", - LMPInvokedEvent( - lmp_id=input.invocation.lmp_id, - # invocation_id=invo.id, - # todo. return data from write invocation - consumes=[] - ).model_dump_json() + if pubsub: + loop = asyncio.get_event_loop() + loop.create_task( + pubsub.publish( + f"lmp/{input.invocation.lmp_id}/invoked", + LMPInvokedEvent( + lmp_id=input.invocation.lmp_id, + # invocation_id=invo.id, + # todo. return data from write invocation + consumes=[] + ).model_dump_json() + ) ) - ) + return input return app diff --git a/src/ell/studio/server.py b/src/ell/studio/server.py index b9331dda4..4c8767f6a 100644 --- a/src/ell/studio/server.py +++ b/src/ell/studio/server.py @@ -10,7 +10,6 @@ import logging import json from ell.studio.config import Config -from ell.studio.connection_manager import ConnectionManager from ell.studio.datamodels import InvocationPublicWithConsumes, SerializedLMPWithUses from ell.stores.studio import SerializedLMP @@ -19,7 +18,8 @@ from contextlib import AsyncExitStack -from ell.studio.pubsub import WebSocketPubSub, PubSub +from ell.api.pubsub.abc import PubSub +from ell.api.pubsub.websocket import WebSocketPubSub logger = logging.getLogger(__name__) @@ -49,7 +49,7 @@ async def setup_pubsub(config: Config, exit_stack: AsyncExitStack): if config.mqtt_connection_string is not None: try: - from ell.studio.mqtt_pubsub import setup + from ell.api.pubsub.mqtt import setup except ImportError as e: raise ImportError( "Received mqtt_connection_string but dependencies missing. Install with `pip install -U ell-ai[mqtt]. More info: https://docs.ell.so/installation") from e diff --git a/tests/api/test_api.py b/tests/api/test_api.py index 824d03d2d..2482b8524 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -6,7 +6,7 @@ from fastapi.testclient import TestClient from ell.api.client import EllSqliteClient -from ell.api.server import NoopPublisher, create_app, get_publisher, get_serializer +from ell.api.server import create_app, get_pubsub, get_serializer from ell.api.config import Config from ell.api.logger import setup_logging from ell.types.serialize import utc_now @@ -92,7 +92,7 @@ def create_test_app(sql_store: EllSqliteClient): config = Config(storage_dir=":memory:") app = create_app(config) - publisher = NoopPublisher() + publisher = None async def get_publisher_override(): yield publisher @@ -101,7 +101,7 @@ async def get_publisher_override(): def get_serializer_override(): return sql_store - app.dependency_overrides[get_publisher] = get_publisher_override + app.dependency_overrides[get_pubsub] = get_publisher_override app.dependency_overrides[get_serializer] = get_serializer_override client = TestClient(app) From 0e26714bf76a6c8492667ebe7127b60c4d5967a0 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 3 Nov 2024 11:04:25 -0800 Subject: [PATCH 18/40] add api.client module --- src/ell/api/client.py | 193 --------------------------------- src/ell/api/client/__init__.py | 0 src/ell/api/client/abc.py | 29 +++++ src/ell/api/client/http.py | 73 +++++++++++++ src/ell/api/client/postgres.py | 54 +++++++++ src/ell/api/client/sqlite.py | 54 +++++++++ src/ell/studio/server.py | 1 - 7 files changed, 210 insertions(+), 194 deletions(-) delete mode 100644 src/ell/api/client.py create mode 100644 src/ell/api/client/__init__.py create mode 100644 src/ell/api/client/abc.py create mode 100644 src/ell/api/client/http.py create mode 100644 src/ell/api/client/postgres.py create mode 100644 src/ell/api/client/sqlite.py diff --git a/src/ell/api/client.py b/src/ell/api/client.py deleted file mode 100644 index 27c1706b6..000000000 --- a/src/ell/api/client.py +++ /dev/null @@ -1,193 +0,0 @@ -#todo. separate http serializer from sql and all of this from the server -import httpx -from typing import Any, Dict, Optional, Protocol, List -# todo. check this does not cause circularity -from ell.types.serialize import LMP, GetLMPResponse, WriteLMPInput, WriteInvocationInput -from ell.stores.sql import SQLiteStore, PostgresStore -# todo. we should convert the other way and keep store and its dependencies separate from this module -from ell.stores.studio import SerializedLMP, Invocation -import logging -from httpx import HTTPStatusError - - -class EllClient(Protocol): - async def get_lmp(self, lmp_id: str) -> GetLMPResponse: - ... - - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - ... - - async def write_invocation(self, input: WriteInvocationInput) -> None: - ... - - async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - ... - - async def retrieve_blob(self, blob_id: str) -> bytes: - ... - - async def close(self): - ... - - async def get_lmp_versions(self, fqn: str) -> List[LMP]: - ... - - -# todo. ell http client / api server client? -class EllAPIClient(EllClient): - def __init__(self, base_url: str): - self.base_url = base_url - self.client = httpx.AsyncClient(base_url=base_url) - - async def get_lmp(self, lmp_id: str) -> GetLMPResponse: - response = await self.client.get(f"/lmp/{lmp_id}") - response.raise_for_status() - data = response.json() - if data is None: - return None - return LMP(**data) - - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - try: - response = await self.client.post("/lmp", json={ - "lmp": lmp.model_dump(mode="json"), - "uses": uses - }) - response.raise_for_status() - except HTTPStatusError as e: - if e.response.status_code == 422: - error_detail = e.response.json().get("detail", "No detailed error message provided") - logging.error(f"Unprocessable Entity (422) Error: {error_detail}") - raise ValueError(f"Invalid input: {error_detail}") from e - raise - - async def write_invocation(self, input: WriteInvocationInput) -> None: - response = await self.client.post( - "/invocation", - json=input.model_dump(mode="json") - ) - response.raise_for_status() - return None - - async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - response = await self.client.post("/blob", data={ - "blob": blob, - "metadata": metadata - }) - response.raise_for_status() - return response.json()["blob_id"] - - async def retrieve_blob(self, blob_id: str) -> bytes: - response = await self.client.get(f"/blob/{blob_id}") - response.raise_for_status() - return response.content - - async def close(self): - await self.client.aclose() - - async def __aenter__(self): - return self - - async def __aexit__(self): - await self.close() - - async def get_lmp_versions(self, fqn: str) -> List[LMP]: - response = await self.client.get("/lmp/versions", params={"fqn": fqn}) - response.raise_for_status() - data = response.json() - return [LMP(**lmp_data) for lmp_data in data] - - -class EllSqliteClient(EllClient): - def __init__(self, storage_dir: str): - self.store = SQLiteStore(storage_dir) - - async def get_lmp(self, lmp_id: str): - lmp = self.store.get_lmp(lmp_id) - if lmp: - return LMP(**lmp.model_dump()) - return None - - async def get_lmp_versions(self, fqn: str) -> List[LMP]: - slmps = self.store.get_versions_by_fqn(fqn) - return [LMP(**slmp.model_dump()) for slmp in slmps] - - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - serialized_lmp = SerializedLMP(**lmp.model_dump()) - self.store.write_lmp(serialized_lmp, uses) - - async def write_invocation(self, input: WriteInvocationInput) -> None: - invocation = Invocation.from_api(input.invocation) - self.store.write_invocation( - invocation, - set(input.consumes) - ) - return None - - async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - if self.store.blob_store is None: - raise ValueError("Blob store is not enabled") - return self.store.blob_store.store_blob(blob, metadata) - - async def retrieve_blob(self, blob_id: str) -> bytes: - if self.store.blob_store is None: - raise ValueError("Blob store is not enabled") - return self.store.blob_store.retrieve_blob(blob_id) - - async def close(self): - # SQLiteStore doesn't have a close method, so this is a no-op - pass - - async def __aenter__(self): - return self - - async def __aexit__(self): - await self.close() - - -# Nb: these are async clients. maybe we want separate sync ones? -class EllPostgresClient(EllClient): - def __init__(self, db_uri: str): - self.store = PostgresStore(db_uri) - - async def get_lmp(self, lmp_id: str): - lmp = self.store.get_lmp(lmp_id) - if lmp: - return LMP(**lmp.model_dump()) - return None - - async def get_lmp_versions(self, fqn: str) -> List[LMP]: - slmps = self.store.get_versions_by_fqn(fqn) - return [LMP(**slmp.model_dump()) for slmp in slmps] - - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - model = SerializedLMP.from_api(lmp) - self.store.write_lmp(model, uses) - - async def write_invocation(self, input: WriteInvocationInput) -> None: - invocation, consumes = input.to_serialized_invocation_input() - self.store.write_invocation( - invocation, - set(consumes) - ) - return None - - async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - if self.store.blob_store is None: - raise ValueError("Blob store is not enabled") - return self.store.blob_store.store_blob(blob, metadata) - - async def retrieve_blob(self, blob_id: str) -> bytes: - if self.store.blob_store is None: - raise ValueError("Blob store is not enabled") - return self.store.blob_store.retrieve_blob(blob_id) - - async def close(self): - # todo. Do we have a close method? - pass - - async def __aenter__(self): - return self - - async def __aexit__(self): - await self.close() diff --git a/src/ell/api/client/__init__.py b/src/ell/api/client/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/ell/api/client/abc.py b/src/ell/api/client/abc.py new file mode 100644 index 000000000..b8abdda2d --- /dev/null +++ b/src/ell/api/client/abc.py @@ -0,0 +1,29 @@ +from typing import Any, Dict, Optional, Protocol, List +# todo. check this does not cause circularity +from ell.types.serialize import LMP, GetLMPResponse, WriteLMPInput, WriteInvocationInput + + +class EllClient(Protocol): + async def get_lmp(self, lmp_id: str) -> GetLMPResponse: + ... + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + ... + + async def write_invocation(self, input: WriteInvocationInput) -> None: + ... + + async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + ... + + async def retrieve_blob(self, blob_id: str) -> bytes: + ... + + async def close(self): + ... + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + ... + + + diff --git a/src/ell/api/client/http.py b/src/ell/api/client/http.py new file mode 100644 index 000000000..08f977efd --- /dev/null +++ b/src/ell/api/client/http.py @@ -0,0 +1,73 @@ +import logging +from typing import List, Optional, Dict, Any + +import httpx +from httpx import HTTPStatusError + +from ell.api.client.abc import EllClient +from ell.types.serialize import GetLMPResponse, WriteLMPInput, LMP, WriteInvocationInput + + +class EllAPIClient(EllClient): + def __init__(self, base_url: str): + self.base_url = base_url + self.client = httpx.AsyncClient(base_url=base_url) + + async def get_lmp(self, lmp_id: str) -> GetLMPResponse: + response = await self.client.get(f"/lmp/{lmp_id}") + response.raise_for_status() + data = response.json() + if data is None: + return None + return LMP(**data) + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + try: + response = await self.client.post("/lmp", json={ + "lmp": lmp.model_dump(mode="json"), + "uses": uses + }) + response.raise_for_status() + except HTTPStatusError as e: + if e.response.status_code == 422: + error_detail = e.response.json().get("detail", "No detailed error message provided") + logging.error(f"Unprocessable Entity (422) Error: {error_detail}") + raise ValueError(f"Invalid input: {error_detail}") from e + raise + + async def write_invocation(self, input: WriteInvocationInput) -> None: + response = await self.client.post( + "/invocation", + json=input.model_dump(mode="json") + ) + response.raise_for_status() + return None + + async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + response = await self.client.post("/blob", data={ + "blob": blob, + "metadata": metadata + }) + response.raise_for_status() + return response.json()["blob_id"] + + async def retrieve_blob(self, blob_id: str) -> bytes: + response = await self.client.get(f"/blob/{blob_id}") + response.raise_for_status() + return response.content + + async def close(self): + await self.client.aclose() + + async def __aenter__(self): + return self + + async def __aexit__(self): + await self.close() + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + response = await self.client.get("/lmp/versions", params={"fqn": fqn}) + response.raise_for_status() + data = response.json() + return [LMP(**lmp_data) for lmp_data in data] + diff --git a/src/ell/api/client/postgres.py b/src/ell/api/client/postgres.py new file mode 100644 index 000000000..70146b071 --- /dev/null +++ b/src/ell/api/client/postgres.py @@ -0,0 +1,54 @@ +from typing import List, Optional, Dict, Any + +from ell.api.client.abc import EllClient +from ell.stores.sql import PostgresStore +from ell.stores.studio import SerializedLMP +from ell.types.serialize import LMP, WriteLMPInput, WriteInvocationInput + + +# Nb: these are async clients. maybe we want separate sync ones? +class EllPostgresClient(EllClient): + def __init__(self, db_uri: str): + self.store = PostgresStore(db_uri) + + async def get_lmp(self, lmp_id: str): + lmp = self.store.get_lmp(lmp_id) + if lmp: + return LMP(**lmp.model_dump()) + return None + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + slmps = self.store.get_versions_by_fqn(fqn) + return [LMP(**slmp.model_dump()) for slmp in slmps] + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + model = SerializedLMP.from_api(lmp) + self.store.write_lmp(model, uses) + + async def write_invocation(self, input: WriteInvocationInput) -> None: + invocation, consumes = input.to_serialized_invocation_input() + self.store.write_invocation( + invocation, + set(consumes) + ) + return None + + async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.store_blob(blob, metadata) + + async def retrieve_blob(self, blob_id: str) -> bytes: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.retrieve_blob(blob_id) + + async def close(self): + # todo. Do we have a close method? + pass + + async def __aenter__(self): + return self + + async def __aexit__(self): + await self.close() diff --git a/src/ell/api/client/sqlite.py b/src/ell/api/client/sqlite.py new file mode 100644 index 000000000..ed1393acb --- /dev/null +++ b/src/ell/api/client/sqlite.py @@ -0,0 +1,54 @@ +from typing import List, Optional, Dict, Any + +from ell.api.client.abc import EllClient +from ell.stores.sql import SQLiteStore +from ell.stores.studio import SerializedLMP, Invocation +from ell.types.serialize import WriteLMPInput, WriteInvocationInput, LMP + + +class EllSqliteClient(EllClient): + def __init__(self, storage_dir: str): + self.store = SQLiteStore(storage_dir) + + async def get_lmp(self, lmp_id: str): + lmp = self.store.get_lmp(lmp_id) + if lmp: + return LMP(**lmp.model_dump()) + return None + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + slmps = self.store.get_versions_by_fqn(fqn) + return [LMP(**slmp.model_dump()) for slmp in slmps] + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + serialized_lmp = SerializedLMP(**lmp.model_dump()) + self.store.write_lmp(serialized_lmp, uses) + + async def write_invocation(self, input: WriteInvocationInput) -> None: + invocation = Invocation.from_api(input.invocation) + self.store.write_invocation( + invocation, + set(input.consumes) + ) + return None + + async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.store_blob(blob, metadata) + + async def retrieve_blob(self, blob_id: str) -> bytes: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.retrieve_blob(blob_id) + + async def close(self): + # SQLiteStore doesn't have a close method, so this is a no-op + pass + + async def __aenter__(self): + return self + + async def __aexit__(self): + await self.close() + diff --git a/src/ell/studio/server.py b/src/ell/studio/server.py index 4c8767f6a..725773ad5 100644 --- a/src/ell/studio/server.py +++ b/src/ell/studio/server.py @@ -55,7 +55,6 @@ async def setup_pubsub(config: Config, exit_stack: AsyncExitStack): "Received mqtt_connection_string but dependencies missing. Install with `pip install -U ell-ai[mqtt]. More info: https://docs.ell.so/installation") from e pubsub, mqtt_client = await setup(config.mqtt_connection_string) - # await exit_stack.enter_async_context(mqtt_client) exit_stack.push_async_exit(mqtt_client) logger.info("Connected to MQTT") From 523edb2abbdaa5f201378ea57be89e5422f74bdf Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Tue, 5 Nov 2024 07:33:54 -0800 Subject: [PATCH 19/40] fix imports --- src/ell/api/server.py | 8 ++++---- tests/api/test_api.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ell/api/server.py b/src/ell/api/server.py index 7e449bc0f..d31720edd 100644 --- a/src/ell/api/server.py +++ b/src/ell/api/server.py @@ -7,7 +7,7 @@ from fastapi import Depends, FastAPI, HTTPException -from ell.api.client import EllClient +from ell.api.client.abc import EllClient from ell.api.config import Config from ell.api.pubsub.abc import PubSub from ell.types.serialize import GetLMPResponse, LMPInvokedEvent, WriteInvocationInput, WriteLMPInput, LMP @@ -49,7 +49,7 @@ def init_serializer(config: Config) -> EllClient: return serializer elif config.pg_connection_string: try: - from ell.api.client import EllPostgresClient + from ell.api.client.postgres import EllPostgresClient return EllPostgresClient(config.pg_connection_string) except ImportError: # todo. centralize this in util or something, we have it everywhere @@ -57,8 +57,8 @@ def init_serializer(config: Config) -> EllClient: "Postgres storage is not enabled. Enable it with `pip install -U ell-api[postgres]`. More info: https://docs.ell.so/installation") elif config.storage_dir: try: - from ell.api.client import EllSqliteClient - return EllSqliteClient(config.pg_connection_string) + from ell.api.client.sqlite import EllSqliteClient + return EllSqliteClient(config.storage_dir) except ImportError: raise ImportError( "SQLite storage is not enabled. Enable it with `pip install -U ell-api[sqlite]`. More info: https://docs.ell.so/installation" diff --git a/tests/api/test_api.py b/tests/api/test_api.py index 2482b8524..fc04dd924 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -5,7 +5,7 @@ from typing import Any, Dict from fastapi.testclient import TestClient -from ell.api.client import EllSqliteClient +from ell.api.client.sqlite import EllSqliteClient from ell.api.server import create_app, get_pubsub, get_serializer from ell.api.config import Config from ell.api.logger import setup_logging From cd0fcf897bb39d991eee49233f437805354aac61 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Fri, 8 Nov 2024 07:49:12 -0800 Subject: [PATCH 20/40] centralize missing extras error --- src/ell/api/server.py | 17 ++++++++++------- src/ell/configurator.py | 9 +++++++-- src/ell/util/errors.py | 6 ++++++ 3 files changed, 23 insertions(+), 9 deletions(-) create mode 100644 src/ell/util/errors.py diff --git a/src/ell/api/server.py b/src/ell/api/server.py index d31720edd..c74a106d7 100644 --- a/src/ell/api/server.py +++ b/src/ell/api/server.py @@ -11,6 +11,7 @@ from ell.api.config import Config from ell.api.pubsub.abc import PubSub from ell.types.serialize import GetLMPResponse, LMPInvokedEvent, WriteInvocationInput, WriteLMPInput, LMP +from ell.util.errors import missing_ell_extras logger = logging.getLogger(__name__) @@ -26,8 +27,10 @@ async def init_pubsub(config: Config, exit_stack: AsyncExitStack): try: from ell.api.pubsub.mqtt import setup except ImportError as e: - raise ImportError( - "Received mqtt_connection_string but dependencies missing. Install with `pip install -U ell-ai[mqtt]. More info: https://docs.ell.so/installation") from e + raise missing_ell_extras( + message="Received mqtt_connection_string but dependencies missing.", + extras=["mqtt"] + ) from e pubsub, mqtt_client = await setup(config.mqtt_connection_string) @@ -52,16 +55,16 @@ def init_serializer(config: Config) -> EllClient: from ell.api.client.postgres import EllPostgresClient return EllPostgresClient(config.pg_connection_string) except ImportError: - # todo. centralize this in util or something, we have it everywhere - raise ImportError( - "Postgres storage is not enabled. Enable it with `pip install -U ell-api[postgres]`. More info: https://docs.ell.so/installation") + raise missing_ell_extras( + message="Postgres storage is not enabled.", extras=["postgres"] + ) elif config.storage_dir: try: from ell.api.client.sqlite import EllSqliteClient return EllSqliteClient(config.storage_dir) except ImportError: - raise ImportError( - "SQLite storage is not enabled. Enable it with `pip install -U ell-api[sqlite]`. More info: https://docs.ell.so/installation" + raise missing_ell_extras( + message="SQLite storage is not enabled.", extras=["sqlite"] ) else: diff --git a/src/ell/configurator.py b/src/ell/configurator.py index f0a2d463b..ae8aa8c68 100644 --- a/src/ell/configurator.py +++ b/src/ell/configurator.py @@ -8,8 +8,10 @@ from ell.provider import Provider from dataclasses import dataclass, field +from ell.util.errors import missing_ell_extras + if TYPE_CHECKING: - from ell.stores import Store + from ell.stores.store import Store else: Store = None @@ -184,7 +186,10 @@ def init( from ell.stores.sql import SQLiteStore config.store = SQLiteStore(store) except ImportError: - raise ImportError("Failed importing SQLiteStore. Install with `pip install -U ell-ai[all]`. More info: https://docs.ell.so/installation") + raise missing_ell_extras( + message="Failed importing SQLiteStore", + extras=["all"] + ) else: config.store = store config.autocommit = autocommit or config.autocommit diff --git a/src/ell/util/errors.py b/src/ell/util/errors.py new file mode 100644 index 000000000..3805df15d --- /dev/null +++ b/src/ell/util/errors.py @@ -0,0 +1,6 @@ +from typing import List + +def missing_ell_extras(message: str, extras: List[str]): + return ImportError( + f"{message}. Enable them with `pip install -U ell-api[{','.join(extras)}]`. More info: https://docs.ell.so/installation" + ) \ No newline at end of file From 7f82ee870a0fa071688763822bb296fb4acf117b Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 10 Nov 2024 06:52:18 -0800 Subject: [PATCH 21/40] add serialize module, minio blob store --- docker/docker-compose.yml | 51 +- examples/future/images_minio.py | 38 ++ poetry.lock | 609 ++++++++++++------ pyproject.toml | 2 + src/ell/api/__main__.py | 54 +- src/ell/api/client/abc.py | 29 - src/ell/api/client/http.py | 73 --- src/ell/api/client/postgres.py | 54 -- src/ell/api/client/sqlite.py | 54 -- src/ell/api/config.py | 21 +- src/ell/api/server.py | 60 +- src/ell/{api/client => serialize}/__init__.py | 0 src/ell/serialize/client.py | 73 +++ src/ell/serialize/config.py | 37 ++ src/ell/serialize/http.py | 134 ++++ src/ell/serialize/postgres.py | 95 +++ src/ell/serialize/protocol.py | 53 ++ src/ell/serialize/sqlite.py | 86 +++ src/ell/stores/__init__.py | 2 + src/ell/stores/minio.py | 55 ++ src/ell/stores/sql.py | 12 +- src/ell/stores/store.py | 14 +- src/ell/studio/__main__.py | 52 +- src/ell/studio/config.py | 23 +- src/ell/studio/server.py | 9 +- src/ell/types/serialize.py | 36 +- tests/api/test_api.py | 12 +- 27 files changed, 1220 insertions(+), 518 deletions(-) create mode 100644 examples/future/images_minio.py delete mode 100644 src/ell/api/client/abc.py delete mode 100644 src/ell/api/client/http.py delete mode 100644 src/ell/api/client/postgres.py delete mode 100644 src/ell/api/client/sqlite.py rename src/ell/{api/client => serialize}/__init__.py (100%) create mode 100644 src/ell/serialize/client.py create mode 100644 src/ell/serialize/config.py create mode 100644 src/ell/serialize/http.py create mode 100644 src/ell/serialize/postgres.py create mode 100644 src/ell/serialize/protocol.py create mode 100644 src/ell/serialize/sqlite.py create mode 100644 src/ell/stores/minio.py diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 1664aa7e0..def2b9d77 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -6,40 +6,50 @@ services: context: .. dockerfile: docker/Dockerfile.api args: - ELL_EXTRAS: postgres mqtt + ELL_EXTRAS: postgres mqtt minio tags: - ell-api ports: - "8081:8081" environment: - - HOST=0.0.0.0 - - PORT=8081 + - ELL_API_HOST=0.0.0.0 + - ELL_API_PORT=8081 - ELL_PG_CONNECTION_STRING=postgresql://ell_user:ell_password@postgres:5432/ell_db - ELL_MQTT_CONNECTION_STRING=mqtt://mqtt:1883 - - LOG_LEVEL=DEBUG + - LOG_LEVEL=10 # debug + - ELL_MINIO_ENDPOINT=minio:9000 + - ELL_MINIO_ACCESS_KEY=minio_user + - ELL_MINIO_SECRET_KEY=minio_password + - ELL_MINIO_BUCKET=ell-bucket depends_on: - postgres - mqtt + - minio studio: build: context: .. dockerfile: docker/Dockerfile.studio args: - ELL_EXTRAS: postgres mqtt + ELL_EXTRAS: postgres mqtt minio tags: - ell-studio ports: - "8080:8080" environment: - - HOST=0.0.0.0 - - PORT=8080 # currently doesn't take effect -- cli defaults it + - ELL_STUDIO_HOST=0.0.0.0 + - ELL_STUDIO_PORT=8080 - ELL_PG_CONNECTION_STRING=postgresql://ell_user:ell_password@postgres:5432/ell_db - ELL_MQTT_CONNECTION_STRING=mqtt://mqtt:1883 + - ELL_MINIO_ENDPOINT=minio:9000 + - ELL_MINIO_ACCESS_KEY=minio_user + - ELL_MINIO_SECRET_KEY=minio_password + - ELL_MINIO_BUCKET=ell-bucket depends_on: - postgres - mqtt + - minio develop: watch: - action: sync+restart @@ -75,6 +85,31 @@ services: ports: - "5432:5432" + minio: + image: minio/minio:latest + ports: + - "9000:9000" # API port + - "9001:9001" # Console port + environment: + - MINIO_ROOT_USER=minio_user + - MINIO_ROOT_PASSWORD=minio_password + volumes: + - minio_data:/data + command: server --console-address ":9001" --address ":9000" /data + + minio-init: + image: minio/mc + depends_on: + - minio + entrypoint: > + /bin/sh -c " + sleep 5; + /usr/bin/mc alias set myminio http://minio:9000 minio_user minio_password --api S3v4; + /usr/bin/mc mb myminio/ell-bucket; + exit 0; + " + volumes: postgres_data: - mosquitto_config: \ No newline at end of file + mosquitto_config: + minio_data: \ No newline at end of file diff --git a/examples/future/images_minio.py b/examples/future/images_minio.py new file mode 100644 index 000000000..cc0040d60 --- /dev/null +++ b/examples/future/images_minio.py @@ -0,0 +1,38 @@ +from PIL import Image +import os + +import ell +from ell.stores.minio import MinioBlobStore, MinioConfig +from ell.stores.sql import PostgresStore + + +# Load the image using PIL +big_picture = Image.open(os.path.join(os.path.dirname(__file__), "bigpicture.jpg")) + +@ell.simple(model="gpt-4o", temperature=0.5) +def make_a_joke_about_the_image(image: Image.Image): + return [ + ell.system("You are a meme maker. You are given an image and you must make a joke about it."), + ell.user(image) + ] + + + +if __name__ == "__main__": + # Run "docker compose up" inside the `docker` folder to run + # ell studio with minio for blob storage with postgres + blob_store = MinioBlobStore( + config=MinioConfig( + endpoint="localhost:9000", + access_key="minio_user", + secret_key="minio_password", + bucket="ell-bucket", + ) + ) + store = PostgresStore( + db_uri="postgresql://ell_user:ell_password@localhost:5432/ell_db", + blob_store=blob_store, + ) + ell.init(store=store, autocommit=True, verbose=True) + joke = make_a_joke_about_the_image(big_picture) + print(joke) \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index d01b066fa..2610f7fab 100644 --- a/poetry.lock +++ b/poetry.lock @@ -84,6 +84,63 @@ doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"] trio = ["trio (>=0.26.1)"] +[[package]] +name = "argon2-cffi" +version = "23.1.0" +description = "Argon2 for Python" +optional = true +python-versions = ">=3.7" +files = [ + {file = "argon2_cffi-23.1.0-py3-none-any.whl", hash = "sha256:c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea"}, + {file = "argon2_cffi-23.1.0.tar.gz", hash = "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08"}, +] + +[package.dependencies] +argon2-cffi-bindings = "*" + +[package.extras] +dev = ["argon2-cffi[tests,typing]", "tox (>4)"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-copybutton", "sphinx-notfound-page"] +tests = ["hypothesis", "pytest"] +typing = ["mypy"] + +[[package]] +name = "argon2-cffi-bindings" +version = "21.2.0" +description = "Low-level CFFI bindings for Argon2" +optional = true +python-versions = ">=3.6" +files = [ + {file = "argon2-cffi-bindings-21.2.0.tar.gz", hash = "sha256:bb89ceffa6c791807d1305ceb77dbfacc5aa499891d2c55661c6459651fc39e3"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ccb949252cb2ab3a08c02024acb77cfb179492d5701c7cbdbfd776124d4d2367"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9524464572e12979364b7d600abf96181d3541da11e23ddf565a32e70bd4dc0d"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58ed19212051f49a523abb1dbe954337dc82d947fb6e5a0da60f7c8471a8476c"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:bd46088725ef7f58b5a1ef7ca06647ebaf0eb4baff7d1d0d177c6cc8744abd86"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_i686.whl", hash = "sha256:8cd69c07dd875537a824deec19f978e0f2078fdda07fd5c42ac29668dda5f40f"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f1152ac548bd5b8bcecfb0b0371f082037e47128653df2e8ba6e914d384f3c3e"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-win32.whl", hash = "sha256:603ca0aba86b1349b147cab91ae970c63118a0f30444d4bc80355937c950c082"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-win_amd64.whl", hash = "sha256:b2ef1c30440dbbcba7a5dc3e319408b59676e2e039e2ae11a8775ecf482b192f"}, + {file = "argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e415e3f62c8d124ee16018e491a009937f8cf7ebf5eb430ffc5de21b900dad93"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3e385d1c39c520c08b53d63300c3ecc28622f076f4c2b0e6d7e796e9f6502194"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c3e3cc67fdb7d82c4718f19b4e7a87123caf8a93fde7e23cf66ac0337d3cb3f"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a22ad9800121b71099d0fb0a65323810a15f2e292f2ba450810a7316e128ee5"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9f8b450ed0547e3d473fdc8612083fd08dd2120d6ac8f73828df9b7d45bb351"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:93f9bf70084f97245ba10ee36575f0c3f1e7d7724d67d8e5b08e61787c320ed7"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3b9ef65804859d335dc6b31582cad2c5166f0c3e7975f324d9ffaa34ee7e6583"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4966ef5848d820776f5f562a7d45fdd70c2f330c961d0d745b784034bd9f48d"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ef543a89dee4db46a1a6e206cd015360e5a75822f76df533845c3cbaf72670"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed2937d286e2ad0cc79a7087d3c272832865f779430e0cc2b4f3718d3159b0cb"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5e00316dabdaea0b2dd82d141cc66889ced0cdcbfa599e8b471cf22c620c329a"}, +] + +[package.dependencies] +cffi = ">=1.0.1" + +[package.extras] +dev = ["cogapp", "pre-commit", "pytest", "wheel"] +tests = ["pytest"] + [[package]] name = "attrs" version = "24.2.0" @@ -199,6 +256,85 @@ files = [ {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, ] +[[package]] +name = "cffi" +version = "1.17.1" +description = "Foreign Function Interface for Python calling C code." +optional = true +python-versions = ">=3.8" +files = [ + {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, + {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"}, + {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"}, + {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"}, + {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"}, + {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"}, + {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"}, + {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"}, + {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"}, + {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"}, + {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"}, + {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"}, + {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"}, + {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"}, + {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, + {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, +] + +[package.dependencies] +pycparser = "*" + [[package]] name = "charset-normalizer" version = "3.4.0" @@ -1036,6 +1172,24 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] +[[package]] +name = "minio" +version = "7.2.10" +description = "MinIO Python SDK for Amazon S3 Compatible Cloud Storage" +optional = true +python-versions = ">3.8" +files = [ + {file = "minio-7.2.10-py3-none-any.whl", hash = "sha256:5961c58192b1d70d3a2a362064b8e027b8232688998a6d1251dadbb02ab57a7d"}, + {file = "minio-7.2.10.tar.gz", hash = "sha256:418c31ac79346a580df04a0e14db1becbc548a6e7cca61f9bc4ef3bcd336c449"}, +] + +[package.dependencies] +argon2-cffi = "*" +certifi = "*" +pycryptodome = "*" +typing-extensions = "*" +urllib3 = "*" + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -1103,13 +1257,13 @@ files = [ [[package]] name = "openai" -version = "1.53.0" +version = "1.54.3" description = "The official Python library for the openai API" optional = false -python-versions = ">=3.7.1" +python-versions = ">=3.8" files = [ - {file = "openai-1.53.0-py3-none-any.whl", hash = "sha256:20f408c32fc5cb66e60c6882c994cdca580a5648e10045cd840734194f033418"}, - {file = "openai-1.53.0.tar.gz", hash = "sha256:be2c4e77721b166cce8130e544178b7d579f751b4b074ffbaade3854b6f85ec5"}, + {file = "openai-1.54.3-py3-none-any.whl", hash = "sha256:f18dbaf09c50d70c4185b892a2a553f80681d1d866323a2da7f7be2f688615d5"}, + {file = "openai-1.54.3.tar.gz", hash = "sha256:7511b74eeb894ac0b0253dc71f087a15d2e4d71d22d0088767205143d880cca6"}, ] [package.dependencies] @@ -1127,13 +1281,13 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] [[package]] name = "packaging" -version = "24.1" +version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, - {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -1335,6 +1489,58 @@ files = [ {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"}, ] +[[package]] +name = "pycparser" +version = "2.22" +description = "C parser in Python" +optional = true +python-versions = ">=3.8" +files = [ + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, +] + +[[package]] +name = "pycryptodome" +version = "3.21.0" +description = "Cryptographic library for Python" +optional = true +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ + {file = "pycryptodome-3.21.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:dad9bf36eda068e89059d1f07408e397856be9511d7113ea4b586642a429a4fd"}, + {file = "pycryptodome-3.21.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:a1752eca64c60852f38bb29e2c86fca30d7672c024128ef5d70cc15868fa10f4"}, + {file = "pycryptodome-3.21.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:3ba4cc304eac4d4d458f508d4955a88ba25026890e8abff9b60404f76a62c55e"}, + {file = "pycryptodome-3.21.0-cp27-cp27m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7cb087b8612c8a1a14cf37dd754685be9a8d9869bed2ffaaceb04850a8aeef7e"}, + {file = "pycryptodome-3.21.0-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:26412b21df30b2861424a6c6d5b1d8ca8107612a4cfa4d0183e71c5d200fb34a"}, + {file = "pycryptodome-3.21.0-cp27-cp27m-win32.whl", hash = "sha256:cc2269ab4bce40b027b49663d61d816903a4bd90ad88cb99ed561aadb3888dd3"}, + {file = "pycryptodome-3.21.0-cp27-cp27m-win_amd64.whl", hash = "sha256:0fa0a05a6a697ccbf2a12cec3d6d2650b50881899b845fac6e87416f8cb7e87d"}, + {file = "pycryptodome-3.21.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6cce52e196a5f1d6797ff7946cdff2038d3b5f0aba4a43cb6bf46b575fd1b5bb"}, + {file = "pycryptodome-3.21.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:a915597ffccabe902e7090e199a7bf7a381c5506a747d5e9d27ba55197a2c568"}, + {file = "pycryptodome-3.21.0-cp27-cp27mu-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4e74c522d630766b03a836c15bff77cb657c5fdf098abf8b1ada2aebc7d0819"}, + {file = "pycryptodome-3.21.0-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:a3804675283f4764a02db05f5191eb8fec2bb6ca34d466167fc78a5f05bbe6b3"}, + {file = "pycryptodome-3.21.0-cp36-abi3-macosx_10_9_universal2.whl", hash = "sha256:2480ec2c72438430da9f601ebc12c518c093c13111a5c1644c82cdfc2e50b1e4"}, + {file = "pycryptodome-3.21.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:de18954104667f565e2fbb4783b56667f30fb49c4d79b346f52a29cb198d5b6b"}, + {file = "pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de4b7263a33947ff440412339cb72b28a5a4c769b5c1ca19e33dd6cd1dcec6e"}, + {file = "pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0714206d467fc911042d01ea3a1847c847bc10884cf674c82e12915cfe1649f8"}, + {file = "pycryptodome-3.21.0-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d85c1b613121ed3dbaa5a97369b3b757909531a959d229406a75b912dd51dd1"}, + {file = "pycryptodome-3.21.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:8898a66425a57bcf15e25fc19c12490b87bd939800f39a03ea2de2aea5e3611a"}, + {file = "pycryptodome-3.21.0-cp36-abi3-musllinux_1_2_i686.whl", hash = "sha256:932c905b71a56474bff8a9c014030bc3c882cee696b448af920399f730a650c2"}, + {file = "pycryptodome-3.21.0-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:18caa8cfbc676eaaf28613637a89980ad2fd96e00c564135bf90bc3f0b34dd93"}, + {file = "pycryptodome-3.21.0-cp36-abi3-win32.whl", hash = "sha256:280b67d20e33bb63171d55b1067f61fbd932e0b1ad976b3a184303a3dad22764"}, + {file = "pycryptodome-3.21.0-cp36-abi3-win_amd64.whl", hash = "sha256:b7aa25fc0baa5b1d95b7633af4f5f1838467f1815442b22487426f94e0d66c53"}, + {file = "pycryptodome-3.21.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:2cb635b67011bc147c257e61ce864879ffe6d03342dc74b6045059dfbdedafca"}, + {file = "pycryptodome-3.21.0-pp27-pypy_73-win32.whl", hash = "sha256:4c26a2f0dc15f81ea3afa3b0c87b87e501f235d332b7f27e2225ecb80c0b1cdd"}, + {file = "pycryptodome-3.21.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d5ebe0763c982f069d3877832254f64974139f4f9655058452603ff559c482e8"}, + {file = "pycryptodome-3.21.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ee86cbde706be13f2dec5a42b52b1c1d1cbb90c8e405c68d0755134735c8dc6"}, + {file = "pycryptodome-3.21.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fd54003ec3ce4e0f16c484a10bc5d8b9bd77fa662a12b85779a2d2d85d67ee0"}, + {file = "pycryptodome-3.21.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5dfafca172933506773482b0e18f0cd766fd3920bd03ec85a283df90d8a17bc6"}, + {file = "pycryptodome-3.21.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:590ef0898a4b0a15485b05210b4a1c9de8806d3ad3d47f74ab1dc07c67a6827f"}, + {file = "pycryptodome-3.21.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f35e442630bc4bc2e1878482d6f59ea22e280d7121d7adeaedba58c23ab6386b"}, + {file = "pycryptodome-3.21.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff99f952db3db2fbe98a0b355175f93ec334ba3d01bbde25ad3a5a33abc02b58"}, + {file = "pycryptodome-3.21.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:8acd7d34af70ee63f9a849f957558e49a98f8f1634f86a59d2be62bb8e93f71c"}, + {file = "pycryptodome-3.21.0.tar.gz", hash = "sha256:f7787e0d469bdae763b876174cf2e6c0f7be79808af26b1da96f1a64bcf47297"}, +] + [[package]] name = "pydantic" version = "2.9.2" @@ -1948,111 +2154,123 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7 [[package]] name = "tokenizers" -version = "0.20.1" +version = "0.20.3" description = "" optional = true python-versions = ">=3.7" files = [ - {file = "tokenizers-0.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:439261da7c0a5c88bda97acb284d49fbdaf67e9d3b623c0bfd107512d22787a9"}, - {file = "tokenizers-0.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:03dae629d99068b1ea5416d50de0fea13008f04129cc79af77a2a6392792d93c"}, - {file = "tokenizers-0.20.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b61f561f329ffe4b28367798b89d60c4abf3f815d37413b6352bc6412a359867"}, - {file = "tokenizers-0.20.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ec870fce1ee5248a10be69f7a8408a234d6f2109f8ea827b4f7ecdbf08c9fd15"}, - {file = "tokenizers-0.20.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d388d1ea8b7447da784e32e3b86a75cce55887e3b22b31c19d0b186b1c677800"}, - {file = "tokenizers-0.20.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:299c85c1d21135bc01542237979bf25c32efa0d66595dd0069ae259b97fb2dbe"}, - {file = "tokenizers-0.20.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e96f6c14c9752bb82145636b614d5a78e9cde95edfbe0a85dad0dd5ddd6ec95c"}, - {file = "tokenizers-0.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc9e95ad49c932b80abfbfeaf63b155761e695ad9f8a58c52a47d962d76e310f"}, - {file = "tokenizers-0.20.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f22dee205329a636148c325921c73cf3e412e87d31f4d9c3153b302a0200057b"}, - {file = "tokenizers-0.20.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2ffd9a8895575ac636d44500c66dffaef133823b6b25067604fa73bbc5ec09d"}, - {file = "tokenizers-0.20.1-cp310-none-win32.whl", hash = "sha256:2847843c53f445e0f19ea842a4e48b89dd0db4e62ba6e1e47a2749d6ec11f50d"}, - {file = "tokenizers-0.20.1-cp310-none-win_amd64.whl", hash = "sha256:f9aa93eacd865f2798b9e62f7ce4533cfff4f5fbd50c02926a78e81c74e432cd"}, - {file = "tokenizers-0.20.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4a717dcb08f2dabbf27ae4b6b20cbbb2ad7ed78ce05a829fae100ff4b3c7ff15"}, - {file = "tokenizers-0.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3f84dad1ff1863c648d80628b1b55353d16303431283e4efbb6ab1af56a75832"}, - {file = "tokenizers-0.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:929c8f3afa16a5130a81ab5079c589226273ec618949cce79b46d96e59a84f61"}, - {file = "tokenizers-0.20.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d10766473954397e2d370f215ebed1cc46dcf6fd3906a2a116aa1d6219bfedc3"}, - {file = "tokenizers-0.20.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9300fac73ddc7e4b0330acbdda4efaabf74929a4a61e119a32a181f534a11b47"}, - {file = "tokenizers-0.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0ecaf7b0e39caeb1aa6dd6e0975c405716c82c1312b55ac4f716ef563a906969"}, - {file = "tokenizers-0.20.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5170be9ec942f3d1d317817ced8d749b3e1202670865e4fd465e35d8c259de83"}, - {file = "tokenizers-0.20.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef3f1ae08fa9aea5891cbd69df29913e11d3841798e0bfb1ff78b78e4e7ea0a4"}, - {file = "tokenizers-0.20.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ee86d4095d3542d73579e953c2e5e07d9321af2ffea6ecc097d16d538a2dea16"}, - {file = "tokenizers-0.20.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:86dcd08da163912e17b27bbaba5efdc71b4fbffb841530fdb74c5707f3c49216"}, - {file = "tokenizers-0.20.1-cp311-none-win32.whl", hash = "sha256:9af2dc4ee97d037bc6b05fa4429ddc87532c706316c5e11ce2f0596dfcfa77af"}, - {file = "tokenizers-0.20.1-cp311-none-win_amd64.whl", hash = "sha256:899152a78b095559c287b4c6d0099469573bb2055347bb8154db106651296f39"}, - {file = "tokenizers-0.20.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:407ab666b38e02228fa785e81f7cf79ef929f104bcccf68a64525a54a93ceac9"}, - {file = "tokenizers-0.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2f13a2d16032ebc8bd812eb8099b035ac65887d8f0c207261472803b9633cf3e"}, - {file = "tokenizers-0.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e98eee4dca22849fbb56a80acaa899eec5b72055d79637dd6aa15d5e4b8628c9"}, - {file = "tokenizers-0.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47c1bcdd61e61136087459cb9e0b069ff23b5568b008265e5cbc927eae3387ce"}, - {file = "tokenizers-0.20.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:128c1110e950534426e2274837fc06b118ab5f2fa61c3436e60e0aada0ccfd67"}, - {file = "tokenizers-0.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2e2d47a819d2954f2c1cd0ad51bb58ffac6f53a872d5d82d65d79bf76b9896d"}, - {file = "tokenizers-0.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bdd67a0e3503a9a7cf8bc5a4a49cdde5fa5bada09a51e4c7e1c73900297539bd"}, - {file = "tokenizers-0.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:689b93d2e26d04da337ac407acec8b5d081d8d135e3e5066a88edd5bdb5aff89"}, - {file = "tokenizers-0.20.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0c6a796ddcd9a19ad13cf146997cd5895a421fe6aec8fd970d69f9117bddb45c"}, - {file = "tokenizers-0.20.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3ea919687aa7001a8ff1ba36ac64f165c4e89035f57998fa6cedcfd877be619d"}, - {file = "tokenizers-0.20.1-cp312-none-win32.whl", hash = "sha256:6d3ac5c1f48358ffe20086bf065e843c0d0a9fce0d7f0f45d5f2f9fba3609ca5"}, - {file = "tokenizers-0.20.1-cp312-none-win_amd64.whl", hash = "sha256:b0874481aea54a178f2bccc45aa2d0c99cd3f79143a0948af6a9a21dcc49173b"}, - {file = "tokenizers-0.20.1-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:96af92e833bd44760fb17f23f402e07a66339c1dcbe17d79a9b55bb0cc4f038e"}, - {file = "tokenizers-0.20.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:65f34e5b731a262dfa562820818533c38ce32a45864437f3d9c82f26c139ca7f"}, - {file = "tokenizers-0.20.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17f98fccb5c12ab1ce1f471731a9cd86df5d4bd2cf2880c5a66b229802d96145"}, - {file = "tokenizers-0.20.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b8c0fc3542cf9370bf92c932eb71bdeb33d2d4aeeb4126d9fd567b60bd04cb30"}, - {file = "tokenizers-0.20.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b39356df4575d37f9b187bb623aab5abb7b62c8cb702867a1768002f814800c"}, - {file = "tokenizers-0.20.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfdad27b0e50544f6b838895a373db6114b85112ba5c0cefadffa78d6daae563"}, - {file = "tokenizers-0.20.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:094663dd0e85ee2e573126918747bdb40044a848fde388efb5b09d57bc74c680"}, - {file = "tokenizers-0.20.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14e4cf033a2aa207d7ac790e91adca598b679999710a632c4a494aab0fc3a1b2"}, - {file = "tokenizers-0.20.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9310951c92c9fb91660de0c19a923c432f110dbfad1a2d429fbc44fa956bf64f"}, - {file = "tokenizers-0.20.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:05e41e302c315bd2ed86c02e917bf03a6cf7d2f652c9cee1a0eb0d0f1ca0d32c"}, - {file = "tokenizers-0.20.1-cp37-none-win32.whl", hash = "sha256:212231ab7dfcdc879baf4892ca87c726259fa7c887e1688e3f3cead384d8c305"}, - {file = "tokenizers-0.20.1-cp37-none-win_amd64.whl", hash = "sha256:896195eb9dfdc85c8c052e29947169c1fcbe75a254c4b5792cdbd451587bce85"}, - {file = "tokenizers-0.20.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:741fb22788482d09d68e73ece1495cfc6d9b29a06c37b3df90564a9cfa688e6d"}, - {file = "tokenizers-0.20.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:10be14ebd8082086a342d969e17fc2d6edc856c59dbdbddd25f158fa40eaf043"}, - {file = "tokenizers-0.20.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:514cf279b22fa1ae0bc08e143458c74ad3b56cd078b319464959685a35c53d5e"}, - {file = "tokenizers-0.20.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a647c5b7cb896d6430cf3e01b4e9a2d77f719c84cefcef825d404830c2071da2"}, - {file = "tokenizers-0.20.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cdf379219e1e1dd432091058dab325a2e6235ebb23e0aec8d0508567c90cd01"}, - {file = "tokenizers-0.20.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ba72260449e16c4c2f6f3252823b059fbf2d31b32617e582003f2b18b415c39"}, - {file = "tokenizers-0.20.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:910b96ed87316e4277b23c7bcaf667ce849c7cc379a453fa179e7e09290eeb25"}, - {file = "tokenizers-0.20.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e53975a6694428a0586534cc1354b2408d4e010a3103117f617cbb550299797c"}, - {file = "tokenizers-0.20.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:07c4b7be58da142b0730cc4e5fd66bb7bf6f57f4986ddda73833cd39efef8a01"}, - {file = "tokenizers-0.20.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b605c540753e62199bf15cf69c333e934077ef2350262af2ccada46026f83d1c"}, - {file = "tokenizers-0.20.1-cp38-none-win32.whl", hash = "sha256:88b3bc76ab4db1ab95ead623d49c95205411e26302cf9f74203e762ac7e85685"}, - {file = "tokenizers-0.20.1-cp38-none-win_amd64.whl", hash = "sha256:d412a74cf5b3f68a90c615611a5aa4478bb303d1c65961d22db45001df68afcb"}, - {file = "tokenizers-0.20.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a25dcb2f41a0a6aac31999e6c96a75e9152fa0127af8ece46c2f784f23b8197a"}, - {file = "tokenizers-0.20.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a12c3cebb8c92e9c35a23ab10d3852aee522f385c28d0b4fe48c0b7527d59762"}, - {file = "tokenizers-0.20.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02e18da58cf115b7c40de973609c35bde95856012ba42a41ee919c77935af251"}, - {file = "tokenizers-0.20.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f326a1ac51ae909b9760e34671c26cd0dfe15662f447302a9d5bb2d872bab8ab"}, - {file = "tokenizers-0.20.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b4872647ea6f25224e2833b044b0b19084e39400e8ead3cfe751238b0802140"}, - {file = "tokenizers-0.20.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce6238a3311bb8e4c15b12600927d35c267b92a52c881ef5717a900ca14793f7"}, - {file = "tokenizers-0.20.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57b7a8880b208866508b06ce365dc631e7a2472a3faa24daa430d046fb56c885"}, - {file = "tokenizers-0.20.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a908c69c2897a68f412aa05ba38bfa87a02980df70f5a72fa8490479308b1f2d"}, - {file = "tokenizers-0.20.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:da1001aa46f4490099c82e2facc4fbc06a6a32bf7de3918ba798010954b775e0"}, - {file = "tokenizers-0.20.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:42c097390e2f0ed0a5c5d569e6669dd4e9fff7b31c6a5ce6e9c66a61687197de"}, - {file = "tokenizers-0.20.1-cp39-none-win32.whl", hash = "sha256:3d4d218573a3d8b121a1f8c801029d70444ffb6d8f129d4cca1c7b672ee4a24c"}, - {file = "tokenizers-0.20.1-cp39-none-win_amd64.whl", hash = "sha256:37d1e6f616c84fceefa7c6484a01df05caf1e207669121c66213cb5b2911d653"}, - {file = "tokenizers-0.20.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:48689da7a395df41114f516208d6550e3e905e1239cc5ad386686d9358e9cef0"}, - {file = "tokenizers-0.20.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:712f90ea33f9bd2586b4a90d697c26d56d0a22fd3c91104c5858c4b5b6489a79"}, - {file = "tokenizers-0.20.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:359eceb6a620c965988fc559cebc0a98db26713758ec4df43fb76d41486a8ed5"}, - {file = "tokenizers-0.20.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d3caf244ce89d24c87545aafc3448be15870096e796c703a0d68547187192e1"}, - {file = "tokenizers-0.20.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03b03cf8b9a32254b1bf8a305fb95c6daf1baae0c1f93b27f2b08c9759f41dee"}, - {file = "tokenizers-0.20.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:218e5a3561561ea0f0ef1559c6d95b825308dbec23fb55b70b92589e7ff2e1e8"}, - {file = "tokenizers-0.20.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f40df5e0294a95131cc5f0e0eb91fe86d88837abfbee46b9b3610b09860195a7"}, - {file = "tokenizers-0.20.1-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:08aaa0d72bb65058e8c4b0455f61b840b156c557e2aca57627056624c3a93976"}, - {file = "tokenizers-0.20.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:998700177b45f70afeb206ad22c08d9e5f3a80639dae1032bf41e8cbc4dada4b"}, - {file = "tokenizers-0.20.1-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62f7fbd3c2c38b179556d879edae442b45f68312019c3a6013e56c3947a4e648"}, - {file = "tokenizers-0.20.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31e87fca4f6bbf5cc67481b562147fe932f73d5602734de7dd18a8f2eee9c6dd"}, - {file = "tokenizers-0.20.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:956f21d359ae29dd51ca5726d2c9a44ffafa041c623f5aa33749da87cfa809b9"}, - {file = "tokenizers-0.20.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1fbbaf17a393c78d8aedb6a334097c91cb4119a9ced4764ab8cfdc8d254dc9f9"}, - {file = "tokenizers-0.20.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ebe63e31f9c1a970c53866d814e35ec2ec26fda03097c486f82f3891cee60830"}, - {file = "tokenizers-0.20.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:81970b80b8ac126910295f8aab2d7ef962009ea39e0d86d304769493f69aaa1e"}, - {file = "tokenizers-0.20.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:130e35e76f9337ed6c31be386e75d4925ea807055acf18ca1a9b0eec03d8fe23"}, - {file = "tokenizers-0.20.1-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd28a8614f5c82a54ab2463554e84ad79526c5184cf4573bbac2efbbbcead457"}, - {file = "tokenizers-0.20.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9041ee665d0fa7f5c4ccf0f81f5e6b7087f797f85b143c094126fc2611fec9d0"}, - {file = "tokenizers-0.20.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:62eb9daea2a2c06bcd8113a5824af8ef8ee7405d3a71123ba4d52c79bb3d9f1a"}, - {file = "tokenizers-0.20.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f861889707b54a9ab1204030b65fd6c22bdd4a95205deec7994dc22a8baa2ea4"}, - {file = "tokenizers-0.20.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:89d5c337d74ea6e5e7dc8af124cf177be843bbb9ca6e58c01f75ea103c12c8a9"}, - {file = "tokenizers-0.20.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:0b7f515c83397e73292accdbbbedc62264e070bae9682f06061e2ddce67cacaf"}, - {file = "tokenizers-0.20.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0305fc1ec6b1e5052d30d9c1d5c807081a7bd0cae46a33d03117082e91908c"}, - {file = "tokenizers-0.20.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5dc611e6ac0fa00a41de19c3bf6391a05ea201d2d22b757d63f5491ec0e67faa"}, - {file = "tokenizers-0.20.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5ffe0d7f7bfcfa3b2585776ecf11da2e01c317027c8573c78ebcb8985279e23"}, - {file = "tokenizers-0.20.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e7edb8ec12c100d5458d15b1e47c0eb30ad606a05641f19af7563bc3d1608c14"}, - {file = "tokenizers-0.20.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:de291633fb9303555793cc544d4a86e858da529b7d0b752bcaf721ae1d74b2c9"}, - {file = "tokenizers-0.20.1.tar.gz", hash = "sha256:84edcc7cdeeee45ceedb65d518fffb77aec69311c9c8e30f77ad84da3025f002"}, + {file = "tokenizers-0.20.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:31ccab28dbb1a9fe539787210b0026e22debeab1662970f61c2d921f7557f7e4"}, + {file = "tokenizers-0.20.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c6361191f762bda98c773da418cf511cbaa0cb8d0a1196f16f8c0119bde68ff8"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f128d5da1202b78fa0a10d8d938610472487da01b57098d48f7e944384362514"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:79c4121a2e9433ad7ef0769b9ca1f7dd7fa4c0cd501763d0a030afcbc6384481"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7850fde24197fe5cd6556e2fdba53a6d3bae67c531ea33a3d7c420b90904141"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b357970c095dc134978a68c67d845a1e3803ab7c4fbb39195bde914e7e13cf8b"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a333d878c4970b72d6c07848b90c05f6b045cf9273fc2bc04a27211721ad6118"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1fd9fee817f655a8f50049f685e224828abfadd436b8ff67979fc1d054b435f1"}, + {file = "tokenizers-0.20.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9e7816808b402129393a435ea2a509679b41246175d6e5e9f25b8692bfaa272b"}, + {file = "tokenizers-0.20.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba96367db9d8a730d3a1d5996b4b7babb846c3994b8ef14008cd8660f55db59d"}, + {file = "tokenizers-0.20.3-cp310-none-win32.whl", hash = "sha256:ee31ba9d7df6a98619426283e80c6359f167e2e9882d9ce1b0254937dbd32f3f"}, + {file = "tokenizers-0.20.3-cp310-none-win_amd64.whl", hash = "sha256:a845c08fdad554fe0871d1255df85772f91236e5fd6b9287ef8b64f5807dbd0c"}, + {file = "tokenizers-0.20.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:585b51e06ca1f4839ce7759941e66766d7b060dccfdc57c4ca1e5b9a33013a90"}, + {file = "tokenizers-0.20.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:61cbf11954f3b481d08723ebd048ba4b11e582986f9be74d2c3bdd9293a4538d"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef820880d5e4e8484e2fa54ff8d297bb32519eaa7815694dc835ace9130a3eea"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:67ef4dcb8841a4988cd00dd288fb95dfc8e22ed021f01f37348fd51c2b055ba9"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff1ef8bd47a02b0dc191688ccb4da53600df5d4c9a05a4b68e1e3de4823e78eb"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:444d188186eab3148baf0615b522461b41b1f0cd58cd57b862ec94b6ac9780f1"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37c04c032c1442740b2c2d925f1857885c07619224a533123ac7ea71ca5713da"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:453c7769d22231960ee0e883d1005c93c68015025a5e4ae56275406d94a3c907"}, + {file = "tokenizers-0.20.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4bb31f7b2847e439766aaa9cc7bccf7ac7088052deccdb2275c952d96f691c6a"}, + {file = "tokenizers-0.20.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:843729bf0f991b29655a069a2ff58a4c24375a553c70955e15e37a90dd4e045c"}, + {file = "tokenizers-0.20.3-cp311-none-win32.whl", hash = "sha256:efcce3a927b1e20ca694ba13f7a68c59b0bd859ef71e441db68ee42cf20c2442"}, + {file = "tokenizers-0.20.3-cp311-none-win_amd64.whl", hash = "sha256:88301aa0801f225725b6df5dea3d77c80365ff2362ca7e252583f2b4809c4cc0"}, + {file = "tokenizers-0.20.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:49d12a32e190fad0e79e5bdb788d05da2f20d8e006b13a70859ac47fecf6ab2f"}, + {file = "tokenizers-0.20.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:282848cacfb9c06d5e51489f38ec5aa0b3cd1e247a023061945f71f41d949d73"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abe4e08c7d0cd6154c795deb5bf81d2122f36daf075e0c12a8b050d824ef0a64"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca94fc1b73b3883c98f0c88c77700b13d55b49f1071dfd57df2b06f3ff7afd64"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef279c7e239f95c8bdd6ff319d9870f30f0d24915b04895f55b1adcf96d6c60d"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16384073973f6ccbde9852157a4fdfe632bb65208139c9d0c0bd0176a71fd67f"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:312d522caeb8a1a42ebdec87118d99b22667782b67898a76c963c058a7e41d4f"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2b7cb962564785a83dafbba0144ecb7f579f1d57d8c406cdaa7f32fe32f18ad"}, + {file = "tokenizers-0.20.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:124c5882ebb88dadae1fc788a582299fcd3a8bd84fc3e260b9918cf28b8751f5"}, + {file = "tokenizers-0.20.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2b6e54e71f84c4202111a489879005cb14b92616a87417f6c102c833af961ea2"}, + {file = "tokenizers-0.20.3-cp312-none-win32.whl", hash = "sha256:83d9bfbe9af86f2d9df4833c22e94d94750f1d0cd9bfb22a7bb90a86f61cdb1c"}, + {file = "tokenizers-0.20.3-cp312-none-win_amd64.whl", hash = "sha256:44def74cee574d609a36e17c8914311d1b5dbcfe37c55fd29369d42591b91cf2"}, + {file = "tokenizers-0.20.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0b630e0b536ef0e3c8b42c685c1bc93bd19e98c0f1543db52911f8ede42cf84"}, + {file = "tokenizers-0.20.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a02d160d2b19bcbfdf28bd9a4bf11be4cb97d0499c000d95d4c4b1a4312740b6"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e3d80d89b068bc30034034b5319218c7c0a91b00af19679833f55f3becb6945"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:174a54910bed1b089226512b4458ea60d6d6fd93060254734d3bc3540953c51c"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:098b8a632b8656aa5802c46689462c5c48f02510f24029d71c208ec2c822e771"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78c8c143e3ae41e718588281eb3e212c2b31623c9d6d40410ec464d7d6221fb5"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b26b0aadb18cd8701077362ba359a06683662d5cafe3e8e8aba10eb05c037f1"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07d7851a72717321022f3774e84aa9d595a041d643fafa2e87fbc9b18711dac0"}, + {file = "tokenizers-0.20.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:bd44e48a430ada902c6266a8245f5036c4fe744fcb51f699999fbe82aa438797"}, + {file = "tokenizers-0.20.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a4c186bb006ccbe1f5cc4e0380d1ce7806f5955c244074fd96abc55e27b77f01"}, + {file = "tokenizers-0.20.3-cp313-none-win32.whl", hash = "sha256:6e19e0f1d854d6ab7ea0c743d06e764d1d9a546932be0a67f33087645f00fe13"}, + {file = "tokenizers-0.20.3-cp313-none-win_amd64.whl", hash = "sha256:d50ede425c7e60966a9680d41b58b3a0950afa1bb570488e2972fa61662c4273"}, + {file = "tokenizers-0.20.3-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:9adda1ff5fb9dcdf899ceca672a4e2ce9e797adb512a6467305ca3d8bfcfbdd0"}, + {file = "tokenizers-0.20.3-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:6dde2cae6004ba7a3badff4a11911cae03ebf23e97eebfc0e71fef2530e5074f"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4a7fd678b35614fca708579eb95b7587a5e8a6d328171bd2488fd9f27d82be4"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1b80e3c7283a01a356bd2210f53d1a4a5d32b269c2024389ed0173137708d50e"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a8cc0e8176b762973758a77f0d9c4467d310e33165fb74173418ca3734944da4"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5634b2e2f5f3d2b4439d2d74066e22eb4b1f04f3fea05cb2a3c12d89b5a3bcd"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b4ba635165bc1ea46f2da8e5d80b5f70f6ec42161e38d96dbef33bb39df73964"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18e4c7c64172e7789bd8b07aa3087ea87c4c4de7e90937a2aa036b5d92332536"}, + {file = "tokenizers-0.20.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1f74909ef7675c26d4095a817ec3393d67f3158ca4836c233212e5613ef640c4"}, + {file = "tokenizers-0.20.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0e9b81321a1e05b16487d312b4264984513f8b4a7556229cafac6e88c2036b09"}, + {file = "tokenizers-0.20.3-cp37-none-win32.whl", hash = "sha256:ab48184cd58b4a03022a2ec75b54c9f600ffea9a733612c02325ed636f353729"}, + {file = "tokenizers-0.20.3-cp37-none-win_amd64.whl", hash = "sha256:60ac483cebee1c12c71878523e768df02fa17e4c54412966cb3ac862c91b36c1"}, + {file = "tokenizers-0.20.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:3229ef103c89583d10b9378afa5d601b91e6337530a0988e17ca8d635329a996"}, + {file = "tokenizers-0.20.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6ac52cc24bad3de865c7e65b1c4e7b70d00938a8ae09a92a453b8f676e714ad5"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04627b7b502fa6a2a005e1bd446fa4247d89abcb1afaa1b81eb90e21aba9a60f"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c27ceb887f0e81a3c377eb4605dca7a95a81262761c0fba308d627b2abb98f2b"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65ab780194da4e1fcf5670523a2f377c4838ebf5249efe41fa1eddd2a84fb49d"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98d343134f47159e81f7f242264b0eb222e6b802f37173c8d7d7b64d5c9d1388"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2475bb004ab2009d29aff13b5047bfdb3d4b474f0aa9d4faa13a7f34dbbbb43"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b6583a65c01db1197c1eb36857ceba8ec329d53afadd268b42a6b04f4965724"}, + {file = "tokenizers-0.20.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:62d00ba208358c037eeab7bfc00a905adc67b2d31b68ab40ed09d75881e114ea"}, + {file = "tokenizers-0.20.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0fc7a39e5bedc817bda395a798dfe2d9c5f7c71153c90d381b5135a0328d9520"}, + {file = "tokenizers-0.20.3-cp38-none-win32.whl", hash = "sha256:84d40ee0f8550d64d3ea92dd7d24a8557a9172165bdb986c9fb2503b4fe4e3b6"}, + {file = "tokenizers-0.20.3-cp38-none-win_amd64.whl", hash = "sha256:205a45246ed7f1718cf3785cff88450ba603352412aaf220ace026384aa3f1c0"}, + {file = "tokenizers-0.20.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:93e37f0269a11dc3b1a953f1fca9707f0929ebf8b4063c591c71a0664219988e"}, + {file = "tokenizers-0.20.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f4cb0c614b0135e781de96c2af87e73da0389ac1458e2a97562ed26e29490d8d"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7eb2fb1c432f5746b22f8a7f09fc18c4156cb0031c77f53cb19379d82d43297a"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfa8d029bb156181b006643309d6b673615a24e4ed24cf03aa191d599b996f51"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f90549622de3bf476ad9f1dd6f3f952ec3ed6ab8615ae88ef060d0c5bfad55d"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1d469c74eebf5c43fd61cd9b030e271d17198edd7bd45392e03a3c091d7d6d4"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bee8f53b2594749f4460d53253bae55d718f04e9b633efa0f5df8938bd98e4f0"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:938441babf3e5720e4459e306ef2809fb267680df9d1ff2873458b22aef60248"}, + {file = "tokenizers-0.20.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7310ab23d7b0caebecc0e8be11a1146f320f5f07284000f6ea54793e83de1b75"}, + {file = "tokenizers-0.20.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:16121eb030a2b13094cfec936b0c12e8b4063c5f839591ea7d0212336d8f9921"}, + {file = "tokenizers-0.20.3-cp39-none-win32.whl", hash = "sha256:401cc21ef642ee235985d747f65e18f639464d377c70836c9003df208d582064"}, + {file = "tokenizers-0.20.3-cp39-none-win_amd64.whl", hash = "sha256:7498f3ea7746133335a6adb67a77cf77227a8b82c8483f644a2e5f86fea42b8d"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e919f2e3e68bb51dc31de4fcbbeff3bdf9c1cad489044c75e2b982a91059bd3c"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b8e9608f2773996cc272156e305bd79066163a66b0390fe21750aff62df1ac07"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39270a7050deaf50f7caff4c532c01b3c48f6608d42b3eacdebdc6795478c8df"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e005466632b1c5d2d2120f6de8aa768cc9d36cd1ab7d51d0c27a114c91a1e6ee"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a07962340b36189b6c8feda552ea1bfeee6cf067ff922a1d7760662c2ee229e5"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:55046ad3dd5f2b3c67501fcc8c9cbe3e901d8355f08a3b745e9b57894855f85b"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:efcf0eb939988b627558aaf2b9dc3e56d759cad2e0cfa04fcab378e4b48fc4fd"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f3558a7ae6a6d38a77dfce12172a1e2e1bf3e8871e744a1861cd7591ea9ebe24"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d53029fe44bc70c3ff14ef512460a0cf583495a0f8e2f4b70e26eb9438e38a9"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57a2a56397b2bec5a629b516b23f0f8a3e4f978c7488d4a299980f8375954b85"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e5bfaae740ef9ece000f8a07e78ac0e2b085c5ce9648f8593ddf0243c9f76d"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:fbaf3ea28fedfb2283da60e710aff25492e795a7397cad8a50f1e079b65a5a70"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c47c037116310dc976eb96b008e41b9cfaba002ed8005848d4d632ee0b7ba9ae"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c31751f0721f58f5e19bb27c1acc259aeff860d8629c4e1a900b26a1979ada8e"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:c697cbd3be7a79ea250ea5f380d6f12e534c543cfb137d5c734966b3ee4f34cc"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b48971b88ef9130bf35b41b35fd857c3c4dae4a9cd7990ebc7fc03e59cc92438"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4e615de179bbe060ab33773f0d98a8a8572b5883dd7dac66c1de8c056c7e748c"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da1ec842035ed9999c62e45fbe0ff14b7e8a7e02bb97688cc6313cf65e5cd755"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:6ee4954c1dd23aadc27958dad759006e71659d497dcb0ef0c7c87ea992c16ebd"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3eda46ca402751ec82553a321bf35a617b76bbed7586e768c02ccacbdda94d6d"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:de082392a85eb0055cc055c535bff2f0cc15d7a000bdc36fbf601a0f3cf8507a"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:c3db46cc0647bfd88263afdb739b92017a02a87ee30945cb3e86c7e25c7c9917"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a292392f24ab9abac5cfa8197e5a6208f2e43723420217e1ceba0b4ec77816ac"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dcd91f4e60f62b20d83a87a84fe062035a1e3ff49a8c2bbdeb2d441c8e311f4"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:900991a2b8ee35961b1095db7e265342e0e42a84c1a594823d5ee9f8fb791958"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:5a8d8261ca2133d4f98aa9627c748189502b3787537ba3d7e2beb4f7cfc5d627"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c4fd4d71e6deb6ddf99d8d0eab87d1d16f635898906e631914a9bae8ae9f2cfb"}, + {file = "tokenizers-0.20.3.tar.gz", hash = "sha256:2278b34c5d0dd78e087e1ca7f9b1dcbf129d80211afa645f214bd6e051037539"}, ] [package.dependencies] @@ -2076,13 +2294,13 @@ files = [ [[package]] name = "tqdm" -version = "4.66.6" +version = "4.67.0" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"}, - {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"}, + {file = "tqdm-4.67.0-py3-none-any.whl", hash = "sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be"}, + {file = "tqdm-4.67.0.tar.gz", hash = "sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a"}, ] [package.dependencies] @@ -2090,19 +2308,20 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +discord = ["requests"] notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] [[package]] name = "typer" -version = "0.12.5" +version = "0.13.0" description = "Typer, build great CLIs. Easy to code. Based on Python type hints." optional = true python-versions = ">=3.7" files = [ - {file = "typer-0.12.5-py3-none-any.whl", hash = "sha256:62fe4e471711b147e3365034133904df3e235698399bc4de2b36c8579298d52b"}, - {file = "typer-0.12.5.tar.gz", hash = "sha256:f592f089bedcc8ec1b974125d64851029c3b1af145f04aca64d69410f0c9b722"}, + {file = "typer-0.13.0-py3-none-any.whl", hash = "sha256:d85fe0b777b2517cc99c8055ed735452f2659cd45e451507c76f48ce5c1d00e2"}, + {file = "typer-0.13.0.tar.gz", hash = "sha256:f1c7198347939361eec90139ffa0fd8b3df3a2259d5852a0f7400e476d95985c"}, ] [package.dependencies] @@ -2313,97 +2532,80 @@ anyio = ">=3.0.0" [[package]] name = "websockets" -version = "13.1" +version = "14.0" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "websockets-13.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f48c749857f8fb598fb890a75f540e3221d0976ed0bf879cf3c7eef34151acee"}, - {file = "websockets-13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c7e72ce6bda6fb9409cc1e8164dd41d7c91466fb599eb047cfda72fe758a34a7"}, - {file = "websockets-13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f779498eeec470295a2b1a5d97aa1bc9814ecd25e1eb637bd9d1c73a327387f6"}, - {file = "websockets-13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4676df3fe46956fbb0437d8800cd5f2b6d41143b6e7e842e60554398432cf29b"}, - {file = "websockets-13.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7affedeb43a70351bb811dadf49493c9cfd1ed94c9c70095fd177e9cc1541fa"}, - {file = "websockets-13.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1971e62d2caa443e57588e1d82d15f663b29ff9dfe7446d9964a4b6f12c1e700"}, - {file = "websockets-13.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5f2e75431f8dc4a47f31565a6e1355fb4f2ecaa99d6b89737527ea917066e26c"}, - {file = "websockets-13.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:58cf7e75dbf7e566088b07e36ea2e3e2bd5676e22216e4cad108d4df4a7402a0"}, - {file = "websockets-13.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c90d6dec6be2c7d03378a574de87af9b1efea77d0c52a8301dd831ece938452f"}, - {file = "websockets-13.1-cp310-cp310-win32.whl", hash = "sha256:730f42125ccb14602f455155084f978bd9e8e57e89b569b4d7f0f0c17a448ffe"}, - {file = "websockets-13.1-cp310-cp310-win_amd64.whl", hash = "sha256:5993260f483d05a9737073be197371940c01b257cc45ae3f1d5d7adb371b266a"}, - {file = "websockets-13.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:61fc0dfcda609cda0fc9fe7977694c0c59cf9d749fbb17f4e9483929e3c48a19"}, - {file = "websockets-13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ceec59f59d092c5007e815def4ebb80c2de330e9588e101cf8bd94c143ec78a5"}, - {file = "websockets-13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1dca61c6db1166c48b95198c0b7d9c990b30c756fc2923cc66f68d17dc558fd"}, - {file = "websockets-13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:308e20f22c2c77f3f39caca508e765f8725020b84aa963474e18c59accbf4c02"}, - {file = "websockets-13.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62d516c325e6540e8a57b94abefc3459d7dab8ce52ac75c96cad5549e187e3a7"}, - {file = "websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87c6e35319b46b99e168eb98472d6c7d8634ee37750d7693656dc766395df096"}, - {file = "websockets-13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5f9fee94ebafbc3117c30be1844ed01a3b177bb6e39088bc6b2fa1dc15572084"}, - {file = "websockets-13.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7c1e90228c2f5cdde263253fa5db63e6653f1c00e7ec64108065a0b9713fa1b3"}, - {file = "websockets-13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6548f29b0e401eea2b967b2fdc1c7c7b5ebb3eeb470ed23a54cd45ef078a0db9"}, - {file = "websockets-13.1-cp311-cp311-win32.whl", hash = "sha256:c11d4d16e133f6df8916cc5b7e3e96ee4c44c936717d684a94f48f82edb7c92f"}, - {file = "websockets-13.1-cp311-cp311-win_amd64.whl", hash = "sha256:d04f13a1d75cb2b8382bdc16ae6fa58c97337253826dfe136195b7f89f661557"}, - {file = "websockets-13.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9d75baf00138f80b48f1eac72ad1535aac0b6461265a0bcad391fc5aba875cfc"}, - {file = "websockets-13.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9b6f347deb3dcfbfde1c20baa21c2ac0751afaa73e64e5b693bb2b848efeaa49"}, - {file = "websockets-13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de58647e3f9c42f13f90ac7e5f58900c80a39019848c5547bc691693098ae1bd"}, - {file = "websockets-13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1b54689e38d1279a51d11e3467dd2f3a50f5f2e879012ce8f2d6943f00e83f0"}, - {file = "websockets-13.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf1781ef73c073e6b0f90af841aaf98501f975d306bbf6221683dd594ccc52b6"}, - {file = "websockets-13.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d23b88b9388ed85c6faf0e74d8dec4f4d3baf3ecf20a65a47b836d56260d4b9"}, - {file = "websockets-13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3c78383585f47ccb0fcf186dcb8a43f5438bd7d8f47d69e0b56f71bf431a0a68"}, - {file = "websockets-13.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d6d300f8ec35c24025ceb9b9019ae9040c1ab2f01cddc2bcc0b518af31c75c14"}, - {file = "websockets-13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a9dcaf8b0cc72a392760bb8755922c03e17a5a54e08cca58e8b74f6902b433cf"}, - {file = "websockets-13.1-cp312-cp312-win32.whl", hash = "sha256:2f85cf4f2a1ba8f602298a853cec8526c2ca42a9a4b947ec236eaedb8f2dc80c"}, - {file = "websockets-13.1-cp312-cp312-win_amd64.whl", hash = "sha256:38377f8b0cdeee97c552d20cf1865695fcd56aba155ad1b4ca8779a5b6ef4ac3"}, - {file = "websockets-13.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a9ab1e71d3d2e54a0aa646ab6d4eebfaa5f416fe78dfe4da2839525dc5d765c6"}, - {file = "websockets-13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b9d7439d7fab4dce00570bb906875734df13d9faa4b48e261c440a5fec6d9708"}, - {file = "websockets-13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:327b74e915cf13c5931334c61e1a41040e365d380f812513a255aa804b183418"}, - {file = "websockets-13.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:325b1ccdbf5e5725fdcb1b0e9ad4d2545056479d0eee392c291c1bf76206435a"}, - {file = "websockets-13.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:346bee67a65f189e0e33f520f253d5147ab76ae42493804319b5716e46dddf0f"}, - {file = "websockets-13.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91a0fa841646320ec0d3accdff5b757b06e2e5c86ba32af2e0815c96c7a603c5"}, - {file = "websockets-13.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:18503d2c5f3943e93819238bf20df71982d193f73dcecd26c94514f417f6b135"}, - {file = "websockets-13.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a9cd1af7e18e5221d2878378fbc287a14cd527fdd5939ed56a18df8a31136bb2"}, - {file = "websockets-13.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:70c5be9f416aa72aab7a2a76c90ae0a4fe2755c1816c153c1a2bcc3333ce4ce6"}, - {file = "websockets-13.1-cp313-cp313-win32.whl", hash = "sha256:624459daabeb310d3815b276c1adef475b3e6804abaf2d9d2c061c319f7f187d"}, - {file = "websockets-13.1-cp313-cp313-win_amd64.whl", hash = "sha256:c518e84bb59c2baae725accd355c8dc517b4a3ed8db88b4bc93c78dae2974bf2"}, - {file = "websockets-13.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c7934fd0e920e70468e676fe7f1b7261c1efa0d6c037c6722278ca0228ad9d0d"}, - {file = "websockets-13.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:149e622dc48c10ccc3d2760e5f36753db9cacf3ad7bc7bbbfd7d9c819e286f23"}, - {file = "websockets-13.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a569eb1b05d72f9bce2ebd28a1ce2054311b66677fcd46cf36204ad23acead8c"}, - {file = "websockets-13.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95df24ca1e1bd93bbca51d94dd049a984609687cb2fb08a7f2c56ac84e9816ea"}, - {file = "websockets-13.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8dbb1bf0c0a4ae8b40bdc9be7f644e2f3fb4e8a9aca7145bfa510d4a374eeb7"}, - {file = "websockets-13.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:035233b7531fb92a76beefcbf479504db8c72eb3bff41da55aecce3a0f729e54"}, - {file = "websockets-13.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:e4450fc83a3df53dec45922b576e91e94f5578d06436871dce3a6be38e40f5db"}, - {file = "websockets-13.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:463e1c6ec853202dd3657f156123d6b4dad0c546ea2e2e38be2b3f7c5b8e7295"}, - {file = "websockets-13.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6d6855bbe70119872c05107e38fbc7f96b1d8cb047d95c2c50869a46c65a8e96"}, - {file = "websockets-13.1-cp38-cp38-win32.whl", hash = "sha256:204e5107f43095012b00f1451374693267adbb832d29966a01ecc4ce1db26faf"}, - {file = "websockets-13.1-cp38-cp38-win_amd64.whl", hash = "sha256:485307243237328c022bc908b90e4457d0daa8b5cf4b3723fd3c4a8012fce4c6"}, - {file = "websockets-13.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9b37c184f8b976f0c0a231a5f3d6efe10807d41ccbe4488df8c74174805eea7d"}, - {file = "websockets-13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:163e7277e1a0bd9fb3c8842a71661ad19c6aa7bb3d6678dc7f89b17fbcc4aeb7"}, - {file = "websockets-13.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4b889dbd1342820cc210ba44307cf75ae5f2f96226c0038094455a96e64fb07a"}, - {file = "websockets-13.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:586a356928692c1fed0eca68b4d1c2cbbd1ca2acf2ac7e7ebd3b9052582deefa"}, - {file = "websockets-13.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7bd6abf1e070a6b72bfeb71049d6ad286852e285f146682bf30d0296f5fbadfa"}, - {file = "websockets-13.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2aad13a200e5934f5a6767492fb07151e1de1d6079c003ab31e1823733ae79"}, - {file = "websockets-13.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:df01aea34b6e9e33572c35cd16bae5a47785e7d5c8cb2b54b2acdb9678315a17"}, - {file = "websockets-13.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e54affdeb21026329fb0744ad187cf812f7d3c2aa702a5edb562b325191fcab6"}, - {file = "websockets-13.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9ef8aa8bdbac47f4968a5d66462a2a0935d044bf35c0e5a8af152d58516dbeb5"}, - {file = "websockets-13.1-cp39-cp39-win32.whl", hash = "sha256:deeb929efe52bed518f6eb2ddc00cc496366a14c726005726ad62c2dd9017a3c"}, - {file = "websockets-13.1-cp39-cp39-win_amd64.whl", hash = "sha256:7c65ffa900e7cc958cd088b9a9157a8141c991f8c53d11087e6fb7277a03f81d"}, - {file = "websockets-13.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5dd6da9bec02735931fccec99d97c29f47cc61f644264eb995ad6c0c27667238"}, - {file = "websockets-13.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:2510c09d8e8df777177ee3d40cd35450dc169a81e747455cc4197e63f7e7bfe5"}, - {file = "websockets-13.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1c3cf67185543730888b20682fb186fc8d0fa6f07ccc3ef4390831ab4b388d9"}, - {file = "websockets-13.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcc03c8b72267e97b49149e4863d57c2d77f13fae12066622dc78fe322490fe6"}, - {file = "websockets-13.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:004280a140f220c812e65f36944a9ca92d766b6cc4560be652a0a3883a79ed8a"}, - {file = "websockets-13.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e2620453c075abeb0daa949a292e19f56de518988e079c36478bacf9546ced23"}, - {file = "websockets-13.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9156c45750b37337f7b0b00e6248991a047be4aa44554c9886fe6bdd605aab3b"}, - {file = "websockets-13.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:80c421e07973a89fbdd93e6f2003c17d20b69010458d3a8e37fb47874bd67d51"}, - {file = "websockets-13.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82d0ba76371769d6a4e56f7e83bb8e81846d17a6190971e38b5de108bde9b0d7"}, - {file = "websockets-13.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9875a0143f07d74dc5e1ded1c4581f0d9f7ab86c78994e2ed9e95050073c94d"}, - {file = "websockets-13.1-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a11e38ad8922c7961447f35c7b17bffa15de4d17c70abd07bfbe12d6faa3e027"}, - {file = "websockets-13.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4059f790b6ae8768471cddb65d3c4fe4792b0ab48e154c9f0a04cefaabcd5978"}, - {file = "websockets-13.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:25c35bf84bf7c7369d247f0b8cfa157f989862c49104c5cf85cb5436a641d93e"}, - {file = "websockets-13.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:83f91d8a9bb404b8c2c41a707ac7f7f75b9442a0a876df295de27251a856ad09"}, - {file = "websockets-13.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a43cfdcddd07f4ca2b1afb459824dd3c6d53a51410636a2c7fc97b9a8cf4842"}, - {file = "websockets-13.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48a2ef1381632a2f0cb4efeff34efa97901c9fbc118e01951ad7cfc10601a9bb"}, - {file = "websockets-13.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:459bf774c754c35dbb487360b12c5727adab887f1622b8aed5755880a21c4a20"}, - {file = "websockets-13.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:95858ca14a9f6fa8413d29e0a585b31b278388aa775b8a81fa24830123874678"}, - {file = "websockets-13.1-py3-none-any.whl", hash = "sha256:a9a396a6ad26130cdae92ae10c36af09d9bfe6cafe69670fd3b6da9b07b4044f"}, - {file = "websockets-13.1.tar.gz", hash = "sha256:a3b3366087c1bc0a2795111edcadddb8b3b59509d5db5d7ea3fdd69f954a8878"}, + {file = "websockets-14.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:064a72c0602c2d2c2586143561e0f179ef9b98e0825dc4a3d5cdf55a81898ed6"}, + {file = "websockets-14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9dc5a2726fd16c266d35838db086fa4e621bb049e3bbe498ab9d54ad5068f726"}, + {file = "websockets-14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1e541e4c8983b118a584c306070878e7f9670b7781e04184b6e05f9fc92e8a0e"}, + {file = "websockets-14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23b13edb4df2d4e5d6dc747d83e6b244e267a6615ede90f18ef13dfb2b6feb87"}, + {file = "websockets-14.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:288365a33049dae3065cdb2c2dd4b48df4b64839c565761c4f3f0c360460a561"}, + {file = "websockets-14.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79e2494047826a56f2951b2ada9dc139d2c3aff63122e86953cafe64ac0fde75"}, + {file = "websockets-14.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5a5b76b47b62de16d26439d362b18d71394ca4376eb2c8838352be64b27ba8af"}, + {file = "websockets-14.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:7ed4111f305770e35070e49fbb9fbf757a9b6c9a31bb86d352eb4031d4aa976f"}, + {file = "websockets-14.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9af48a2f4cc5e2e34cf69969079865100e418c27caa26c1e3369efcc20c81e17"}, + {file = "websockets-14.0-cp310-cp310-win32.whl", hash = "sha256:a97c10043bf74d7667be69383312007d54a507fac8fa101be492cc91e279d94d"}, + {file = "websockets-14.0-cp310-cp310-win_amd64.whl", hash = "sha256:5f86250ee98f6098479936b7d596418b6e4c919dfa156508e9d6ac5f8bfbe764"}, + {file = "websockets-14.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3c12e6c1331ee8833fcb565c033f7eb4cb5642af37cef81211c222b617b170df"}, + {file = "websockets-14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:445a53bce8344e62df4ed9a22fdd1f06cad8e404ead64b2a1f19bd826c8dad1b"}, + {file = "websockets-14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3e4be641fed120790241ae15fde27374a62cadaadcc0bd2b4ce35790bd284fb6"}, + {file = "websockets-14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b886b6d14cd089396155e6beb2935268bf995057bf24c3e5fd609af55c584a03"}, + {file = "websockets-14.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9b8a85d62709a86a9a55d4720502e88968483ee7f365bd852b75935dec04e0d"}, + {file = "websockets-14.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08d62f438a591c016c5d4c79eaf9a8f7a85b6c3ea88793d676c00c930a41e775"}, + {file = "websockets-14.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:189e9f074f2a77f7cf54634797b29be28116ee564ece421c7653030a2cef48f0"}, + {file = "websockets-14.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0b406f2387dbaf301996b7b2cf41519c1fbba7d5c9626406dd56f72075a60a00"}, + {file = "websockets-14.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a3741f4394ba3d55a64949ee11ffdba19e2a2bdaa1319a96a7ab93bf8bd2b9b2"}, + {file = "websockets-14.0-cp311-cp311-win32.whl", hash = "sha256:b639ea88a46f4629645b398c9e7be0366c92e4910203a6314f78469f5e631dc5"}, + {file = "websockets-14.0-cp311-cp311-win_amd64.whl", hash = "sha256:715b238c1772ed28b98af8830df41c5d68941729e22384fe1433db495b1d5438"}, + {file = "websockets-14.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f988f141a9be7a74d2e98d446b2f5411038bad14cdab80f9d1644b2329a71b48"}, + {file = "websockets-14.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7fd212e7022c70b4f8246dee4449dde30ff50c7e8e1d61ac87b7879579badd03"}, + {file = "websockets-14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4c06f014fd8fa3827e5fd03ec012945e2139901f261fcc401e0622476cad9c5c"}, + {file = "websockets-14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fad8f03dc976e710db785abf9deb76eb259312fb54d77b568c73f0162cef96e"}, + {file = "websockets-14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6cff048a155024a580fee9f9a66b0ad9fc82683f6470c26eb76dd9280e6f459e"}, + {file = "websockets-14.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56ec8098dcc47817c8aee8037165f0fe30fec8efe543c66e0924781a4bfcbdfd"}, + {file = "websockets-14.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee5fb667aec4ae723d40ada9854128df427b35b526c600cd352ca0240aad4dd7"}, + {file = "websockets-14.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:2752c98237057f27594a8393d498edd9db37e06abcfb99176d9cb6fb989dc883"}, + {file = "websockets-14.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e9ff528498d9e5c543bee388023ca91870678ac50724d675853ba85b4f0a459e"}, + {file = "websockets-14.0-cp312-cp312-win32.whl", hash = "sha256:8982909857b09220ee31d9a45699fce26f8e5b94a10efa7fe07004d4f4200a33"}, + {file = "websockets-14.0-cp312-cp312-win_amd64.whl", hash = "sha256:61b60c2a07b6d25f7ce8cc0101d55fb0f1af388bec1eddfe0181085c2206e7b0"}, + {file = "websockets-14.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7cf000319db10a0cb5c7ce91bfd2a8699086b5cc0b5c5b83b92eec22a0448b2f"}, + {file = "websockets-14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0bae3caf386d418e83b62e8c1c4cec1b13348fac43e530b9894d6c7c02d921b5"}, + {file = "websockets-14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8eb46ac94d5c131336dc997a568f5579501958b14a507e6aa4840f6d856da980"}, + {file = "websockets-14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12c345585b1da70cd27a298b0b9a81aa18da7a690672f771b427db59c632d8aa"}, + {file = "websockets-14.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:81758da7c76b4e2ddabc4a98a51f3c3aca8585a6d3a8662b5061613303bd5f68"}, + {file = "websockets-14.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4eae86193fd667667f35367d292b912685cb22c3f9f1dd6deaa3fdd713ab5976"}, + {file = "websockets-14.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7078dd0eac3a1dccf2c6f474004dbe8a4e936dbd19d37bbfb6efa70c923ae04e"}, + {file = "websockets-14.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2a418d596536a470f6f8e94cbb1fde66fe65e03d68c403eee0f2198b129e139a"}, + {file = "websockets-14.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7d66eeab61956e231f35659e6d5b66dc04a3d51e65f2b8f71862dc6a8ba710d1"}, + {file = "websockets-14.0-cp313-cp313-win32.whl", hash = "sha256:b24f7286a5c4e350284623cf708662f0881fe7bc1146c1a1fe7e6a9be01a8d6b"}, + {file = "websockets-14.0-cp313-cp313-win_amd64.whl", hash = "sha256:fb260539dd2b64e93c9f2c59caa70d36d2020fb8e26fa17f62459ad50ebf6c24"}, + {file = "websockets-14.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0913596e0072202be8729dab05266398b72ee57c4232f48d52fe2a0370d0b53f"}, + {file = "websockets-14.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f2e7710f3c468519f9d5b01a291c407f809f8f831e5a204b238e02447046d78"}, + {file = "websockets-14.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0ae0e14729038208711d2e2f769280621c22cd253e3dac00f809fa38c6ccb79d"}, + {file = "websockets-14.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4875d1c3ab3d1d9a9d8485dc1f4c2aaa63947824af03301911ea58d1e881e096"}, + {file = "websockets-14.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:678990bc5a1e4fa36e18d340d439079a21e6b8d249848b7066cad1a6cbd34b82"}, + {file = "websockets-14.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdaf3b31f8343dcc6c20d068c10eb29325dd70f5dc321ebb5fbeaa280436e70e"}, + {file = "websockets-14.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:633bbda2d30bc695900f6a07de4e5d92a4e8e8d0d8a536bb3c2051bee4dc3856"}, + {file = "websockets-14.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1c4ca7cc5a02f909789dad259dffe61be4f38ffb26dc5e26ab2dca2c7d7c87de"}, + {file = "websockets-14.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5ade11f4939b885303d28b53d512e96e1a8ea8fbebedd6fef3e2e1afe633cc2a"}, + {file = "websockets-14.0-cp39-cp39-win32.whl", hash = "sha256:281b5ab9514eb241e347a46367a2374cb60cf8f420c4283948aa188f05e7810c"}, + {file = "websockets-14.0-cp39-cp39-win_amd64.whl", hash = "sha256:72fe11675685412917363481b79c56e68175e62352f84ca4788ac264f9ea6ed0"}, + {file = "websockets-14.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3f1a697262e28682222f18fae70eb0800dfa50c6eb96b0561c6beb83d6cf78ca"}, + {file = "websockets-14.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e0e543e0e81c55e68552bd3c081282721c710a6379a2a78e1ec793853479b25"}, + {file = "websockets-14.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2786c74cbcb0263fd541e4a075aa8c932bdcaa91e5bbb8649c65304799acdd64"}, + {file = "websockets-14.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:176b39547950ff3520728bd1eadd0fa02c68492a1fabca636bab7883dd390905"}, + {file = "websockets-14.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86626d560ceb9d846d128b9c7bd2d0f247dbb62fb49c386762d109583140bf48"}, + {file = "websockets-14.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ca447967131023e98fcb4867f05cf8584adb424b9108180b2414745a6ff41c31"}, + {file = "websockets-14.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c4eb304743ab285f8f057344d115259fbe31e42151b9aae7610db83d2a7379b1"}, + {file = "websockets-14.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:cc7dbe53276429b2ca511a04a3979ce27aa2088fdd28c119c6913dccdfd0e909"}, + {file = "websockets-14.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6dd785f7a521189b1233d3c86c0b66fb73d4769a1d253ce5b31081c5946f05f"}, + {file = "websockets-14.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:77697c303b874daf1c76d4e167cd5d6871c26964bc189e4bdb40427067d53a86"}, + {file = "websockets-14.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20979614e4d7266f15018c154255d35dfb9fc828fdf6b4924166b6728fed359f"}, + {file = "websockets-14.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3fb3d9e3940ea15b30404200e768e6111c3ee2956c60ceb001cae057961ab058"}, + {file = "websockets-14.0-py3-none-any.whl", hash = "sha256:1a3bca8cfb66614e23a65aa5d6b87190876ec6f3247094939f9db877db55319c"}, + {file = "websockets-14.0.tar.gz", hash = "sha256:be90aa6dab180fed523c0c10a6729ad16c9ba79067402d01a4d8aa7ce48d4084"}, ] [[package]] @@ -2430,6 +2632,7 @@ all = ["anthropic", "fastapi", "groq", "sqlmodel", "uvicorn"] anthropic = ["anthropic"] api-server = ["fastapi", "uvicorn"] groq = ["groq"] +minio = ["minio"] mqtt = ["aiomqtt"] postgres = ["psycopg2", "sqlmodel"] sqlite = ["sqlmodel"] @@ -2438,4 +2641,4 @@ studio = ["fastapi", "sqlmodel", "uvicorn"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "5e05acc71c0666a38c06b7837fa7ecb02e5615dd4bfd5ed7573541730e904456" +content-hash = "72237a6518efe3e6c572aaa24221ddb9da6a7cecd0eb6f1b630af29e542f8ca6" diff --git a/pyproject.toml b/pyproject.toml index 10382a088..56cd9838a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ groq = { version = "^0.11.0", optional = true } # Storage psycopg2 = { version = ">=2.7", optional = true } sqlmodel = { version = ">=0.0.21, <0.1.0", optional = true } +minio = { version = "7.2.10", optional = true } # Studio fastapi = { version = "^0.111.1", optional = true } uvicorn = { version = "^0.30.3", optional = true } @@ -65,6 +66,7 @@ postgres = ['sqlmodel', 'psycopg2'] studio = ['fastapi', 'uvicorn', 'sqlmodel'] api-server = ["fastapi", "uvicorn"] mqtt = ["aiomqtt"] +minio = ["minio"] all = [ "anthropic", "groq", diff --git a/src/ell/api/__main__.py b/src/ell/api/__main__.py index acdfe4089..f86ae22ed 100644 --- a/src/ell/api/__main__.py +++ b/src/ell/api/__main__.py @@ -13,21 +13,41 @@ def main(): - log_level = os.environ.get("LOG_LEVEL", logging.INFO) + log_level = int(os.environ.get("LOG_LEVEL", logging.INFO)) setup_logging(level=log_level) - parser = ArgumentParser(description="ELL API Server") - parser.add_argument("--storage-dir", default=None, - help="Storage directory (default: None)") - parser.add_argument("--pg-connection-string", default=None, - help="PostgreSQL connection string (default: None)") - parser.add_argument("--mqtt-connection-string", default=None, - help="MQTT connection string (default: None)") - parser.add_argument("--host", default=None, - help="Host to run the server on") - parser.add_argument("--port", type=int, default=None, - help="Port to run the server on") - parser.add_argument("--dev", action="store_true", + parser = ArgumentParser(description="ell api") + parser.add_argument("--storage-dir", + type=str, + default=os.getenv("ELL_STORAGE_DIR"), + help="Storage directory (default: None, env: ELL_STORAGE_DIR)") + parser.add_argument("--pg-connection-string", + default=os.getenv("ELL_PG_CONNECTION_STRING"), + help="PostgreSQL connection string (default: None, env: ELL_PG_CONNECTION_STRING)") + parser.add_argument("--mqtt-connection-string", + default=os.getenv("ELL_MQTT_CONNECTION_STRING"), + help="MQTT connection string (default: None, env: ELL_MQTT_CONNECTION_STRING)") + parser.add_argument("--minio-endpoint", + default=os.getenv("ELL_MINIO_ENDPOINT"), + help="MinIO endpoint (default: None, env: ELL_MINIO_ENDPOINT)") + parser.add_argument("--minio-access-key", + default=os.getenv("ELL_MINIO_ACCESS_KEY"), + help="MinIO access key (default: None, env: ELL_MINIO_ACCESS_KEY)") + parser.add_argument("--minio-secret-key", + default=os.getenv("ELL_MINIO_SECRET_KEY"), + help="MinIO secret key (default: None, env: ELL_MINIO_SECRET_KEY)") + parser.add_argument("--minio-bucket", + default=os.getenv("ELL_MINIO_BUCKET"), + help="MinIO bucket (default: None, env: ELL_MINIO_BUCKET)") + parser.add_argument("--host", + default=os.getenv("ELL_API_HOST") or "0.0.0.0", + help="Host to run the server on (default: '0.0.0.0', env: ELL_API_HOST)") + parser.add_argument("--port", + type=int, + default=int(os.getenv("ELL_API_PORT") or 8081), + help="Port to run the server on (default: 8081, env: ELL_API_PORT)") + parser.add_argument("--dev", + action="store_true", help="Run in development mode") args = parser.parse_args() @@ -35,6 +55,10 @@ def main(): storage_dir=args.storage_dir, pg_connection_string=args.pg_connection_string, mqtt_connection_string=args.mqtt_connection_string, + minio_endpoint=args.minio_endpoint, + minio_access_key=args.minio_access_key, + minio_secret_key=args.minio_secret_key, + minio_bucket=args.minio_bucket, ) app = create_app(config) @@ -43,8 +67,8 @@ def main(): config = uvicorn.Config( app=app, - host=args.host if args.host else os.environ.get("HOST", "0.0.0.0"), - port=args.port if args.port else int(os.environ.get("PORT", 8081)), + host=args.host, + port=args.port, loop=loop # type: ignore ) server = uvicorn.Server(config) diff --git a/src/ell/api/client/abc.py b/src/ell/api/client/abc.py deleted file mode 100644 index b8abdda2d..000000000 --- a/src/ell/api/client/abc.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import Any, Dict, Optional, Protocol, List -# todo. check this does not cause circularity -from ell.types.serialize import LMP, GetLMPResponse, WriteLMPInput, WriteInvocationInput - - -class EllClient(Protocol): - async def get_lmp(self, lmp_id: str) -> GetLMPResponse: - ... - - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - ... - - async def write_invocation(self, input: WriteInvocationInput) -> None: - ... - - async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - ... - - async def retrieve_blob(self, blob_id: str) -> bytes: - ... - - async def close(self): - ... - - async def get_lmp_versions(self, fqn: str) -> List[LMP]: - ... - - - diff --git a/src/ell/api/client/http.py b/src/ell/api/client/http.py deleted file mode 100644 index 08f977efd..000000000 --- a/src/ell/api/client/http.py +++ /dev/null @@ -1,73 +0,0 @@ -import logging -from typing import List, Optional, Dict, Any - -import httpx -from httpx import HTTPStatusError - -from ell.api.client.abc import EllClient -from ell.types.serialize import GetLMPResponse, WriteLMPInput, LMP, WriteInvocationInput - - -class EllAPIClient(EllClient): - def __init__(self, base_url: str): - self.base_url = base_url - self.client = httpx.AsyncClient(base_url=base_url) - - async def get_lmp(self, lmp_id: str) -> GetLMPResponse: - response = await self.client.get(f"/lmp/{lmp_id}") - response.raise_for_status() - data = response.json() - if data is None: - return None - return LMP(**data) - - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - try: - response = await self.client.post("/lmp", json={ - "lmp": lmp.model_dump(mode="json"), - "uses": uses - }) - response.raise_for_status() - except HTTPStatusError as e: - if e.response.status_code == 422: - error_detail = e.response.json().get("detail", "No detailed error message provided") - logging.error(f"Unprocessable Entity (422) Error: {error_detail}") - raise ValueError(f"Invalid input: {error_detail}") from e - raise - - async def write_invocation(self, input: WriteInvocationInput) -> None: - response = await self.client.post( - "/invocation", - json=input.model_dump(mode="json") - ) - response.raise_for_status() - return None - - async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - response = await self.client.post("/blob", data={ - "blob": blob, - "metadata": metadata - }) - response.raise_for_status() - return response.json()["blob_id"] - - async def retrieve_blob(self, blob_id: str) -> bytes: - response = await self.client.get(f"/blob/{blob_id}") - response.raise_for_status() - return response.content - - async def close(self): - await self.client.aclose() - - async def __aenter__(self): - return self - - async def __aexit__(self): - await self.close() - - async def get_lmp_versions(self, fqn: str) -> List[LMP]: - response = await self.client.get("/lmp/versions", params={"fqn": fqn}) - response.raise_for_status() - data = response.json() - return [LMP(**lmp_data) for lmp_data in data] - diff --git a/src/ell/api/client/postgres.py b/src/ell/api/client/postgres.py deleted file mode 100644 index 70146b071..000000000 --- a/src/ell/api/client/postgres.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import List, Optional, Dict, Any - -from ell.api.client.abc import EllClient -from ell.stores.sql import PostgresStore -from ell.stores.studio import SerializedLMP -from ell.types.serialize import LMP, WriteLMPInput, WriteInvocationInput - - -# Nb: these are async clients. maybe we want separate sync ones? -class EllPostgresClient(EllClient): - def __init__(self, db_uri: str): - self.store = PostgresStore(db_uri) - - async def get_lmp(self, lmp_id: str): - lmp = self.store.get_lmp(lmp_id) - if lmp: - return LMP(**lmp.model_dump()) - return None - - async def get_lmp_versions(self, fqn: str) -> List[LMP]: - slmps = self.store.get_versions_by_fqn(fqn) - return [LMP(**slmp.model_dump()) for slmp in slmps] - - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - model = SerializedLMP.from_api(lmp) - self.store.write_lmp(model, uses) - - async def write_invocation(self, input: WriteInvocationInput) -> None: - invocation, consumes = input.to_serialized_invocation_input() - self.store.write_invocation( - invocation, - set(consumes) - ) - return None - - async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - if self.store.blob_store is None: - raise ValueError("Blob store is not enabled") - return self.store.blob_store.store_blob(blob, metadata) - - async def retrieve_blob(self, blob_id: str) -> bytes: - if self.store.blob_store is None: - raise ValueError("Blob store is not enabled") - return self.store.blob_store.retrieve_blob(blob_id) - - async def close(self): - # todo. Do we have a close method? - pass - - async def __aenter__(self): - return self - - async def __aexit__(self): - await self.close() diff --git a/src/ell/api/client/sqlite.py b/src/ell/api/client/sqlite.py deleted file mode 100644 index ed1393acb..000000000 --- a/src/ell/api/client/sqlite.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import List, Optional, Dict, Any - -from ell.api.client.abc import EllClient -from ell.stores.sql import SQLiteStore -from ell.stores.studio import SerializedLMP, Invocation -from ell.types.serialize import WriteLMPInput, WriteInvocationInput, LMP - - -class EllSqliteClient(EllClient): - def __init__(self, storage_dir: str): - self.store = SQLiteStore(storage_dir) - - async def get_lmp(self, lmp_id: str): - lmp = self.store.get_lmp(lmp_id) - if lmp: - return LMP(**lmp.model_dump()) - return None - - async def get_lmp_versions(self, fqn: str) -> List[LMP]: - slmps = self.store.get_versions_by_fqn(fqn) - return [LMP(**slmp.model_dump()) for slmp in slmps] - - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - serialized_lmp = SerializedLMP(**lmp.model_dump()) - self.store.write_lmp(serialized_lmp, uses) - - async def write_invocation(self, input: WriteInvocationInput) -> None: - invocation = Invocation.from_api(input.invocation) - self.store.write_invocation( - invocation, - set(input.consumes) - ) - return None - - async def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - if self.store.blob_store is None: - raise ValueError("Blob store is not enabled") - return self.store.blob_store.store_blob(blob, metadata) - - async def retrieve_blob(self, blob_id: str) -> bytes: - if self.store.blob_store is None: - raise ValueError("Blob store is not enabled") - return self.store.blob_store.retrieve_blob(blob_id) - - async def close(self): - # SQLiteStore doesn't have a close method, so this is a no-op - pass - - async def __aenter__(self): - return self - - async def __aexit__(self): - await self.close() - diff --git a/src/ell/api/config.py b/src/ell/api/config.py index db121b585..6d95d8845 100644 --- a/src/ell/api/config.py +++ b/src/ell/api/config.py @@ -1,5 +1,4 @@ # todo. move this under ell.api.server -from functools import lru_cache import json import os from typing import Any, Optional @@ -10,17 +9,14 @@ logger = logging.getLogger(__name__) -# todo. maybe we default storage dir and other things in the future to a well-known location -# like ~/.ell or something -@lru_cache(maxsize=1) -def ell_home() -> str: - return os.path.join(os.path.expanduser("~"), ".ell") - - class Config(BaseModel): storage_dir: Optional[str] = None pg_connection_string: Optional[str] = None mqtt_connection_string: Optional[str] = None + minio_endpoint: Optional[str] = None + minio_access_key: Optional[str] = None + minio_secret_key: Optional[str] = None + minio_bucket: Optional[str] = None log_level: int = logging.INFO def __init__(self, **kwargs: Any): @@ -28,10 +24,6 @@ def __init__(self, **kwargs: Any): def model_post_init(self, __context: Any): # Storage - self.pg_connection_string = self.pg_connection_string or os.getenv( - "ELL_PG_CONNECTION_STRING") - self.storage_dir = self.storage_dir or os.getenv("ELL_STORAGE_DIR") - # Enforce that we use either sqlite or postgres, but not both if self.pg_connection_string is not None and self.storage_dir is not None: raise ValueError("Cannot use both sqlite and postgres") @@ -42,8 +34,5 @@ def model_post_init(self, __context: Any): # todo. better default? self.storage_dir = os.getcwd() - # Pubsub - self.mqtt_connection_string = self.mqtt_connection_string or os.getenv("ELL_MQTT_CONNECTION_STRING") - - logger.info(f"Resolved config: {json.dumps(self.model_dump(), indent=2)}") + logger.info(f"Resolved config: {json.dumps(self.model_dump(exclude_none=True), indent=2)}") diff --git a/src/ell/api/server.py b/src/ell/api/server.py index c74a106d7..84c6bb019 100644 --- a/src/ell/api/server.py +++ b/src/ell/api/server.py @@ -7,10 +7,12 @@ from fastapi import Depends, FastAPI, HTTPException -from ell.api.client.abc import EllClient from ell.api.config import Config from ell.api.pubsub.abc import PubSub -from ell.types.serialize import GetLMPResponse, LMPInvokedEvent, WriteInvocationInput, WriteLMPInput, LMP +from ell.serialize.client import get_async_serializer +from ell.serialize.config import SerializeConfig +from ell.serialize.protocol import EllAsyncSerializer +from ell.types.serialize import GetLMPOutput, LMPInvokedEvent, WriteInvocationInput, WriteLMPInput, LMP, WriteBlobInput from ell.util.errors import missing_ell_extras logger = logging.getLogger(__name__) @@ -43,32 +45,18 @@ async def init_pubsub(config: Config, exit_stack: AsyncExitStack): -serializer: Optional[EllClient] = None +serializer: Optional[EllAsyncSerializer] = None -def init_serializer(config: Config) -> EllClient: +def init_serializer(config: Config) -> EllAsyncSerializer: global serializer if serializer is not None: return serializer - elif config.pg_connection_string: - try: - from ell.api.client.postgres import EllPostgresClient - return EllPostgresClient(config.pg_connection_string) - except ImportError: - raise missing_ell_extras( - message="Postgres storage is not enabled.", extras=["postgres"] - ) - elif config.storage_dir: - try: - from ell.api.client.sqlite import EllSqliteClient - return EllSqliteClient(config.storage_dir) - except ImportError: - raise missing_ell_extras( - message="SQLite storage is not enabled.", extras=["sqlite"] - ) - - else: - raise ValueError("No storage configuration found") + serializer = get_async_serializer(config=SerializeConfig( + **config.model_dump() + )) + + return serializer def get_serializer(): @@ -107,8 +95,8 @@ async def lifespan(app: FastAPI): pubsub = None app = FastAPI( - title="ELL API", - description="Ell API Server", + title="ell api", + description="ell api server", version="0.1.0", lifespan=lifespan ) @@ -116,12 +104,12 @@ async def lifespan(app: FastAPI): @app.get("/lmp/versions", response_model=List[LMP]) async def get_lmp_versions( fqn: str, - serializer: EllClient = Depends(get_serializer)): - return serializer.get_lmp_versions(fqn) + serializer: EllAsyncSerializer = Depends(get_serializer)): + return await serializer.get_lmp_versions(fqn) - @app.get("/lmp/{lmp_id}", response_model=GetLMPResponse) + @app.get("/lmp/{lmp_id}", response_model=GetLMPOutput) async def get_lmp(lmp_id: str, - serializer: EllClient = Depends(get_serializer), + serializer: EllAsyncSerializer = Depends(get_serializer), # todo. figure out the ramifications of doing this here # session: Session = Depends(get_session) ): @@ -136,7 +124,7 @@ async def write_lmp( # fixme. what is this type supposed to be? uses: List[str], # SerializedLMPUses, pubsub: PubSub = Depends(get_pubsub), - serializer: EllClient = Depends(get_serializer) + serializer: EllAsyncSerializer = Depends(get_serializer) ): await serializer.write_lmp(lmp, uses) @@ -156,7 +144,7 @@ async def write_lmp( async def write_invocation( input: WriteInvocationInput, pubsub: PubSub = Depends(get_pubsub), - serializer: EllClient = Depends(get_serializer) + serializer: EllAsyncSerializer = Depends(get_serializer) ): logger.info(f"Writing invocation {input.invocation.lmp_id}") # TODO: return anything this might create like invocation id @@ -178,4 +166,14 @@ async def write_invocation( return input + @app.post("/blob") + async def store_blob( + input: WriteBlobInput, + serializer: EllAsyncSerializer = Depends(get_serializer) + ): + if not serializer.supports_blobs: + raise HTTPException(status_code=400, detail="Blob support is not enabled.") + return await serializer.store_blob(**input.model_dump()) + + return app diff --git a/src/ell/api/client/__init__.py b/src/ell/serialize/__init__.py similarity index 100% rename from src/ell/api/client/__init__.py rename to src/ell/serialize/__init__.py diff --git a/src/ell/serialize/client.py b/src/ell/serialize/client.py new file mode 100644 index 000000000..7adca6773 --- /dev/null +++ b/src/ell/serialize/client.py @@ -0,0 +1,73 @@ +from typing import Optional + +from ell.serialize.protocol import EllSerializer, EllAsyncSerializer +from ell.stores.store import AsyncBlobStore, BlobStore +from ell.serialize.config import SerializeConfig +from ell.util.errors import missing_ell_extras + + + +def get_blob_store(config: SerializeConfig) -> Optional[BlobStore]: + if config.minio_endpoint is not None: + try: + from ell.stores.minio import MinioBlobStore, MinioConfig + minio_config = MinioConfig( + endpoint=config.minio_endpoint, + access_key=config.minio_access_key, # type: ignore + secret_key=config.minio_secret_key, # type: ignore + bucket=config.minio_bucket # type: ignore + ) + return MinioBlobStore(minio_config) + except ImportError: + raise missing_ell_extras(message="MinIO storage is not enabled.", extras=["minio"]) + return None + + +def get_serializer(config: SerializeConfig) -> EllSerializer: + blob_store = get_blob_store(config) + if config.pg_connection_string: + try: + from ell.serialize.postgres import PostgresSerializer + return PostgresSerializer(config.pg_connection_string, blob_store) # type: ignore + except ImportError: + raise missing_ell_extras(message="Postgres storage is not enabled.", extras=["postgres"]) + if config.storage_dir: + try: + from ell.serialize.sqlite import SQLiteSerializer + return SQLiteSerializer(config.storage_dir, blob_store) + except ImportError: + raise missing_ell_extras(message="SQLite storage is not enabled.", extras=["sqlite"]) + raise ValueError("No storage configuration found.") + + +def get_async_blob_store(config: SerializeConfig) -> Optional[AsyncBlobStore]: + if config.minio_endpoint is not None: + try: + from ell.stores.minio import AsyncMinioBlobStore, MinioConfig + minio_config = MinioConfig( + endpoint=config.minio_endpoint, + access_key=config.minio_access_key, # type: ignore + secret_key=config.minio_secret_key, # type: ignore + bucket=config.minio_bucket # type: ignore + ) + return AsyncMinioBlobStore(minio_config) + except ImportError: + raise missing_ell_extras(message="MinIO storage is not enabled.", extras=["minio"]) + return None + + +def get_async_serializer(config: SerializeConfig) -> EllAsyncSerializer: + blob_store = get_async_blob_store(config) + if config.pg_connection_string: + try: + from ell.serialize.postgres import AsyncPostgresSerializer + return AsyncPostgresSerializer(config.pg_connection_string, blob_store) + except ImportError: + raise missing_ell_extras(message="Postgres storage is not enabled.", extras=["postgres"]) + if config.storage_dir: + try: + from ell.serialize.sqlite import AsyncSQLiteSerializer + return AsyncSQLiteSerializer(config.storage_dir, blob_store) + except ImportError: + raise missing_ell_extras(message="SQLite storage is not enabled.", extras=["sqlite"]) + raise ValueError("No storage configuration found.") diff --git a/src/ell/serialize/config.py b/src/ell/serialize/config.py new file mode 100644 index 000000000..c88bac6f8 --- /dev/null +++ b/src/ell/serialize/config.py @@ -0,0 +1,37 @@ +import json +import os +from typing import Any, Optional +from pydantic import BaseModel + +import logging + +logger = logging.getLogger(__name__) + + +class SerializeConfig(BaseModel): + storage_dir: Optional[str] = None + pg_connection_string: Optional[str] = None + mqtt_connection_string: Optional[str] = None + minio_endpoint: Optional[str] = None + minio_access_key: Optional[str] = None + minio_secret_key: Optional[str] = None + minio_bucket: Optional[str] = None + log_level: int = logging.INFO + + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + + def model_post_init(self, __context: Any): + # Enforce that we use either sqlite or postgres, but not both + if self.pg_connection_string is not None and self.storage_dir is not None: + raise ValueError("Cannot use both sqlite and postgres") + + # Fall back to sqlite if no PostgreSQL connection string is provided + if self.pg_connection_string is None and self.storage_dir is None: + # This intends to honor the default we had set in the CLI + # todo. better default? + self.storage_dir = os.getcwd() + + logger.info(f"Resolved config: {json.dumps(self.model_dump(exclude_none=True), indent=2)}") + diff --git a/src/ell/serialize/http.py b/src/ell/serialize/http.py new file mode 100644 index 000000000..2b416be62 --- /dev/null +++ b/src/ell/serialize/http.py @@ -0,0 +1,134 @@ +import logging +from typing import List, Optional, Dict, Any + +import httpx +from httpx import HTTPStatusError + +from ell.serialize.protocol import EllAsyncSerializer, EllSerializer +from ell.types.serialize import GetLMPOutput, WriteLMPInput, LMP, WriteInvocationInput + + +class EllHTTPSerializer(EllSerializer): + def __init__(self, base_url: str): + self.base_url = base_url + self.client = httpx.Client(base_url=base_url) + self.supports_blobs = True # we assume the server does, if not will find out later + + def get_lmp(self, lmp_id: str) -> GetLMPOutput: + response = self.client.get(f"/lmp/{lmp_id}") + response.raise_for_status() + data = response.json() + if data is None: + return None + return LMP(**data) + + def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + try: + response = self.client.post("/lmp", json={ + "lmp": lmp.model_dump(mode="json"), + "uses": uses + }) + response.raise_for_status() + except HTTPStatusError as e: + if e.response.status_code == 422: + error_detail = e.response.json().get("detail", "No detailed error message provided") + logging.error(f"Unprocessable Entity (422) Error: {error_detail}") + raise ValueError(f"Invalid input: {error_detail}") from e + raise + + def write_invocation(self, input: WriteInvocationInput) -> None: + response = self.client.post( + "/invocation", + json=input.model_dump(mode="json") + ) + response.raise_for_status() + return None + + def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + response = self.client.post("/blob", data={ + "blob_id": blob_id, + "blob": blob, + "metadata": metadata + }) + response.raise_for_status() + return response.json()["blob_id"] + + def retrieve_blob(self, blob_id: str) -> bytes: + response = self.client.get(f"/blob/{blob_id}") + response.raise_for_status() + return response.content + + def close(self): + self.client.close() + + def get_lmp_versions(self, fqn: str) -> List[LMP]: + response = self.client.get("/lmp/versions", params={"fqn": fqn}) + response.raise_for_status() + data = response.json() + return [LMP(**lmp_data) for lmp_data in data] + + +class EllAsyncHTTPSerializer(EllAsyncSerializer): + def __init__(self, base_url: str): + self.base_url = base_url + self.client = httpx.AsyncClient(base_url=base_url) + self.supports_blobs = True # we assume the server does, if not will find out later + + async def get_lmp(self, lmp_id: str) -> GetLMPOutput: + response = await self.client.get(f"/lmp/{lmp_id}") + response.raise_for_status() + data = response.json() + if data is None: + return None + return LMP(**data) + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + try: + response = await self.client.post("/lmp", json={ + "lmp": lmp.model_dump(mode="json"), + "uses": uses + }) + response.raise_for_status() + except HTTPStatusError as e: + if e.response.status_code == 422: + error_detail = e.response.json().get("detail", "No detailed error message provided") + logging.error(f"Unprocessable Entity (422) Error: {error_detail}") + raise ValueError(f"Invalid input: {error_detail}") from e + raise + + async def write_invocation(self, input: WriteInvocationInput) -> None: + response = await self.client.post( + "/invocation", + json=input.model_dump(mode="json") + ) + response.raise_for_status() + return None + + async def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + response = await self.client.post("/blob", data={ + "blob_id": blob_id, + "blob": blob, + "metadata": metadata + }) + response.raise_for_status() + return response.json()["blob_id"] + + async def retrieve_blob(self, blob_id: str) -> bytes: + response = await self.client.get(f"/blob/{blob_id}") + response.raise_for_status() + return response.content + + async def close(self): + await self.client.aclose() + + async def __aenter__(self): + return self + + async def __aexit__(self): + await self.close() + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + response = await self.client.get("/lmp/versions", params={"fqn": fqn}) + response.raise_for_status() + data = response.json() + return [LMP(**lmp_data) for lmp_data in data] diff --git a/src/ell/serialize/postgres.py b/src/ell/serialize/postgres.py new file mode 100644 index 000000000..9fe03634f --- /dev/null +++ b/src/ell/serialize/postgres.py @@ -0,0 +1,95 @@ +from typing import List, Optional, Dict, Any + +from ell.stores.sql import PostgresStore +from ell.stores.store import BlobStore, AsyncBlobStore +from ell.stores.studio import Invocation, SerializedLMP +from ell.types.serialize import LMP, WriteLMPInput, WriteInvocationInput +from ell.serialize.protocol import EllSerializer, EllAsyncSerializer + + +class PostgresSerializer(EllSerializer): + def __init__(self, db_uri: str, blob_store: Optional[BlobStore] = None): + self.store = PostgresStore(db_uri, blob_store) + self.supports_blobs = blob_store is not None + + def get_lmp(self, lmp_id: str): + lmp = self.store.get_lmp(lmp_id) + if lmp: + return LMP(**lmp.model_dump()) + return None + + def get_lmp_versions(self, fqn: str) -> List[LMP]: + slmps = self.store.get_versions_by_fqn(fqn) + return [LMP(**slmp.model_dump()) for slmp in slmps] + + def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + model = SerializedLMP.from_api(lmp) + self.store.write_lmp(model, uses) + + def write_invocation(self, input: WriteInvocationInput) -> None: + invocation = Invocation.from_api(input.invocation) + self.store.write_invocation(invocation, set(input.consumes)) + return None + + def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.store_blob(blob=blob, blob_id=blob_id) + + def retrieve_blob(self, blob_id: str) -> bytes: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.retrieve_blob(blob_id) + + def close(self): + pass + + +# todo(async): the underlying store is not async-aware +class AsyncPostgresSerializer(EllAsyncSerializer): + def __init__(self, db_uri: str, blob_store: Optional[AsyncBlobStore] = None): + self.store = PostgresStore(db_uri, blob_store) + self.blob_store = blob_store + self.supports_blobs = blob_store is not None + + async def get_lmp(self, lmp_id: str) -> Optional[LMP]: + lmp = self.store.get_lmp(lmp_id) + if lmp: + return LMP(**lmp.model_dump()) + return None + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + slmps = self.store.get_versions_by_fqn(fqn) + return [LMP(**slmp.model_dump()) for slmp in slmps] + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + model = SerializedLMP.from_api(lmp) + self.store.write_lmp(model, uses) + + async def write_invocation(self, input: WriteInvocationInput) -> None: + invocation = Invocation.from_api(input.invocation) + self.store.write_invocation( + invocation, + set(input.consumes) + ) + return None + + async def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + if self.blob_store is None: + raise ValueError("Blob store is not enabled") + return await self.blob_store.store_blob(blob=blob, blob_id=blob_id) + + async def retrieve_blob(self, blob_id: str) -> bytes: + if self.blob_store is None: + raise ValueError("Blob store is not enabled") + return await self.blob_store.retrieve_blob(blob_id) + + async def close(self): + # todo. Do we have a close method? + pass + + async def __aenter__(self): + return self + + async def __aexit__(self): + await self.close() diff --git a/src/ell/serialize/protocol.py b/src/ell/serialize/protocol.py new file mode 100644 index 000000000..0df700aea --- /dev/null +++ b/src/ell/serialize/protocol.py @@ -0,0 +1,53 @@ +from typing import Protocol, Optional, List, Dict, Any + +from ell.types.serialize import GetLMPOutput, WriteLMPInput, WriteInvocationInput, LMP + + +class EllSerializer(Protocol): + supports_blobs: bool + + def get_lmp(self, lmp_id: str) -> GetLMPOutput: + ... + + def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + ... + + def write_invocation(self, input: WriteInvocationInput) -> None: + ... + + def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + ... + + def retrieve_blob(self, blob_id: str) -> bytes: + ... + + def close(self): + ... + + def get_lmp_versions(self, fqn: str) -> List[LMP]: + ... + + +class EllAsyncSerializer(Protocol): + supports_blobs: bool + + async def get_lmp(self, lmp_id: str) -> GetLMPOutput: + ... + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + ... + + async def write_invocation(self, input: WriteInvocationInput) -> None: + ... + + async def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + ... + + async def retrieve_blob(self, blob_id: str) -> bytes: + ... + + async def close(self): + ... + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + ... diff --git a/src/ell/serialize/sqlite.py b/src/ell/serialize/sqlite.py new file mode 100644 index 000000000..b005899db --- /dev/null +++ b/src/ell/serialize/sqlite.py @@ -0,0 +1,86 @@ +from typing import List, Optional, Dict, Any + +from ell.serialize.protocol import EllSerializer, EllAsyncSerializer +from ell.stores.sql import SQLiteStore +from ell.stores.store import AsyncBlobStore, BlobStore +from ell.stores.studio import SerializedLMP, Invocation +from ell.types.serialize import WriteLMPInput, WriteInvocationInput, LMP + + + +class SQLiteSerializer(EllSerializer): + def __init__(self, storage_dir: str, blob_store: Optional[BlobStore] = None): + self.store = SQLiteStore(storage_dir, blob_store) + self.supports_blobs = True + + def get_lmp(self, lmp_id: str): + lmp = self.store.get_lmp(lmp_id) + if lmp: + return LMP(**lmp.model_dump()) + return None + + def get_lmp_versions(self, fqn: str) -> List[LMP]: + slmps = self.store.get_versions_by_fqn(fqn) + return [LMP(**slmp.model_dump()) for slmp in slmps] + + def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + serialized_lmp = SerializedLMP.from_api(lmp) + self.store.write_lmp(serialized_lmp, uses) + + def write_invocation(self, input: WriteInvocationInput) -> None: + invocation = Invocation.from_api(input.invocation) + self.store.write_invocation(invocation, set(input.consumes)) + return None + + def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + return self.store.blob_store.store_blob(blob, blob_id) # type: ignore + + def retrieve_blob(self, blob_id: str) -> bytes: + return self.store.blob_store.retrieve_blob(blob_id) # type: ignore + + def close(self): + pass + + + +# todo(async). underlying store is not async-aware +class AsyncSQLiteSerializer(EllAsyncSerializer): + def __init__(self, storage_dir: str, blob_store: Optional[AsyncBlobStore] = None): + self.store = SQLiteStore(storage_dir, blob_store) + self.blob_store = blob_store + self.supports_blobs = True + + async def get_lmp(self, lmp_id: str): + lmp = self.store.get_lmp(lmp_id) + if lmp: + return LMP(**lmp.model_dump()) + return None + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + slmps = self.store.get_versions_by_fqn(fqn) + return [LMP(**slmp.model_dump()) for slmp in slmps] + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + serialized_lmp = SerializedLMP.from_api(lmp) + self.store.write_lmp(serialized_lmp, uses) + + async def write_invocation(self, input: WriteInvocationInput) -> None: + invocation = Invocation.from_api(input.invocation) + self.store.write_invocation(invocation, set(input.consumes)) + return None + + async def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + return await self.blob_store.store_blob(blob, blob_id) # type: ignore + + async def retrieve_blob(self, blob_id: str) -> bytes: + return await self.blob_store.retrieve_blob(blob_id) # type: ignore + + async def close(self): + pass + + async def __aenter__(self): + return self + + async def __aexit__(self): + await self.close() + diff --git a/src/ell/stores/__init__.py b/src/ell/stores/__init__.py index 45ec0bd33..9ac77a283 100644 --- a/src/ell/stores/__init__.py +++ b/src/ell/stores/__init__.py @@ -1,4 +1,6 @@ try: + # TODO. this will actually be ok once we have stores that do not require sqlmodel, so we may not want to rely on it now + # or have a stores.sql module later import sqlmodel except ImportError: raise ImportError("ell.stores has missing dependencies. Install them with `pip install -U ell-ai[sqlite]` or `pip install -U ell-ai[postgres]`. More info: https://docs.ell.so/installation/custom-installation") diff --git a/src/ell/stores/minio.py b/src/ell/stores/minio.py new file mode 100644 index 000000000..525a055cc --- /dev/null +++ b/src/ell/stores/minio.py @@ -0,0 +1,55 @@ +import io + +from pydantic import BaseModel, Field +import ell.stores.store + +import minio + + +class MinioConfig(BaseModel): + endpoint: str = Field(description="The endpoint of the minio server") + access_key: str = Field(description="The access key of the minio server") + secret_key: str = Field(description="The secret key of the minio server") + bucket: str = Field(description="The bucket to store the blobs in") + + +class MinioBlobStore(ell.stores.store.BlobStore): + def __init__(self, config: MinioConfig): + self.config = config + self.client = minio.Minio( + #todo. support tls with dev vs prod + secure=False,#False if config.endpoint.startswith("localhost") else True, + endpoint=config.endpoint, + access_key=config.access_key, + secret_key=config.secret_key) + + def store_blob(self, blob: bytes, blob_id: str, **kwargs): + self.client.put_object( + bucket_name=self.config.bucket, + object_name=blob_id, + data=io.BytesIO(blob), + length=len(blob) + ) + return blob_id + + def retrieve_blob(self, blob_id: str) -> bytes: + return self.client.get_object(self.config.bucket, blob_id).read() + +# todo. make this actually async +class AsyncMinioBlobStore(ell.stores.store.AsyncBlobStore): + def __init__(self, config: MinioConfig): + self.config = config + self.client = minio.Minio( + config.endpoint, config.access_key, config.secret_key) + + async def store_blob(self, blob: bytes, blob_id: str, **kwargs): + self.client.put_object( + bucket_name=self.config.bucket, + object_name=blob_id, + data=io.BytesIO(blob), + length=len(blob) + ) + return blob_id + + async def retrieve_blob(self, blob_id: str) -> bytes: + return self.client.get_object(self.config.bucket, blob_id).read() diff --git a/src/ell/stores/sql.py b/src/ell/stores/sql.py index f9e8478cf..3f27baca7 100644 --- a/src/ell/stores/sql.py +++ b/src/ell/stores/sql.py @@ -1,3 +1,4 @@ +import logging from datetime import datetime, timedelta import os from typing import Any, Optional, Dict, List, Set @@ -10,6 +11,8 @@ import gzip import json +logger = logging.getLogger(__name__) + class SQLStore(ell.stores.store.Store): def __init__(self, db_uri: str = None, blob_store: Optional[ell.stores.store.BlobStore] = None, engine: Optional[Engine] = None, ): @@ -216,7 +219,7 @@ def get_invocations_aggregate(self, session: Session, lmp_filters: Dict[str, Any } class SQLiteStore(SQLStore): - def __init__(self, db_dir: str): + def __init__(self, db_dir: str, blob_store: Optional[ell.stores.store.BlobStore] = None): assert not db_dir.endswith('.db'), "Create store with a directory not a db." if ":memory:" in db_dir: from sqlalchemy.pool import StaticPool @@ -238,7 +241,7 @@ def __init__(self, db_dir: str): os.makedirs(db_dir, exist_ok=True) self.db_dir = db_dir db_path = os.path.join(db_dir, 'ell.db') - blob_store = SQLBlobStore(db_dir) + blob_store = SQLBlobStore(db_dir) if blob_store is None else blob_store super().__init__(f'sqlite:///{db_path}', blob_store=blob_store) class SQLBlobStore(ell.stores.store.BlobStore): @@ -266,6 +269,7 @@ def _get_blob_path(self, id: str, depth: int = 2) -> str: return os.path.join(self.db_dir, *dirs, file_name) class PostgresStore(SQLStore): - def __init__(self, db_uri: str): - super().__init__(db_uri) + def __init__(self, db_uri: str, blob_store: Optional[ell.stores.store.BlobStore] = None): + super().__init__(db_uri, blob_store) + logger.debug("Postgres store initialized") diff --git a/src/ell/stores/store.py b/src/ell/stores/store.py index 8390b9804..3ebd1956d 100644 --- a/src/ell/stores/store.py +++ b/src/ell/stores/store.py @@ -15,7 +15,7 @@ class BlobStore(ABC): @abstractmethod - def store_blob(self, blob: bytes, blob_id : str) -> str: + def store_blob(self, blob: bytes, blob_id : str, metadata: Optional[Dict[str, Any]] = None) -> str: """Store a blob and return its identifier.""" pass @@ -24,6 +24,18 @@ def retrieve_blob(self, blob_id: str) -> bytes: """Retrieve a blob by its identifier.""" pass + +class AsyncBlobStore(BlobStore): + @abstractmethod + async def store_blob(self, blob: bytes, blob_id: str, metadata: Optional[Dict[str, Any]] = None) -> str: + """Store a blob and return its identifier.""" + pass + + @abstractmethod + async def retrieve_blob(self, blob_id: str) -> bytes: + """Retrieve a blob by its identifier.""" + pass + class Store(ABC): """ Abstract base class for serializers. Defines the interface for serializing and deserializing LMPs and invocations. diff --git a/src/ell/studio/__main__.py b/src/ell/studio/__main__.py index 59da2f9cc..3206145c8 100644 --- a/src/ell/studio/__main__.py +++ b/src/ell/studio/__main__.py @@ -1,5 +1,6 @@ import asyncio import logging +import os import socket import time import webbrowser @@ -32,14 +33,33 @@ def _setup_logging(level): def main(): parser = ArgumentParser(description="ell studio") - parser.add_argument("--storage-dir" , default=None, - help="Directory for filesystem serializer storage (default: current directory)") - parser.add_argument("--pg-connection-string", default=None, - help="PostgreSQL connection string (default: None)") - parser.add_argument("--mqtt-connection-string", default=None, - help="MQTT connection string (default: None)") - parser.add_argument("--host", default="0.0.0.0", help="Host to run the server on (default: 0.0.0.0)") - parser.add_argument("--port", type=int, default=5555, help="Port to run the server on (default: 5555)") + parser.add_argument("--storage-dir" , + default=os.getenv("ELL_STORAGE_DIR"), + help="Directory for filesystem serialize storage (default: None, env: ELL_STORAGE_DIR)") + parser.add_argument("--pg-connection-string", + default=os.getenv("ELL_PG_CONNECTION_STRING"), + help="PostgreSQL connection string (default: None, env: ELL_PG_CONNECTION_STRING)") + parser.add_argument("--mqtt-connection-string", + default=os.getenv("ELL_MQTT_CONNECTION_STRING"), + help="MQTT connection string (default: None, env: ELL_MQTT_CONNECTION_STRING)") + parser.add_argument("--minio-endpoint", + default=os.getenv("ELL_MINIO_ENDPOINT"), + help="MinIO endpoint (default: None, env: ELL_MINIO_ENDPOINT)") + parser.add_argument("--minio-access-key", + default=os.getenv("ELL_MINIO_ACCESS_KEY"), + help="MinIO access key (default: None, env: ELL_MINIO_ACCESS_KEY)") + parser.add_argument("--minio-secret-key", + default=os.getenv("ELL_MINIO_SECRET_KEY"), + help="MinIO secret key (default: None, env: ELL_MINIO_SECRET_KEY)") + parser.add_argument("--minio-bucket", default=os.getenv("ELL_MINIO_BUCKET"), + help="MinIO bucket (default: None, env: ELL_MINIO_BUCKET)") + parser.add_argument("--host", + default=os.getenv("ELL_STUDIO_HOST") or "0.0.0.0", + help="Host to run the server on (default: 0.0.0.0, env: ELL_STUDIO_HOST)") + parser.add_argument("--port", + type=int, + default=int(os.getenv("ELL_STUDIO_PORT") or 5555), + help="Port to run the server on (default: 5555, env: ELL_STUDIO_PORT)") parser.add_argument("--dev", action="store_true", help="Run in development mode") parser.add_argument("--dev-static-dir", default=None, help="Directory to serve static files from in development mode") parser.add_argument("--open", action="store_true", help="Opens the studio web UI in a browser") @@ -49,11 +69,17 @@ def main(): _setup_logging(logging.DEBUG if args.verbose else logging.INFO) if args.dev: - assert args.port == 5555, "Port must be 5000 in development mode" - - config = Config.create(storage_dir=args.storage_dir, - pg_connection_string=args.pg_connection_string, - mqtt_connection_string=args.mqtt_connection_string) + assert args.port == 5555, "Port must be 5555 in development mode" + + config = Config.create( + storage_dir=args.storage_dir, + pg_connection_string=args.pg_connection_string, + mqtt_connection_string=args.mqtt_connection_string, + minio_endpoint=args.minio_endpoint, + minio_access_key=args.minio_access_key, + minio_secret_key=args.minio_secret_key, + minio_bucket=args.minio_bucket + ) app = create_app(config) if not args.dev: diff --git a/src/ell/studio/config.py b/src/ell/studio/config.py index b252732f5..b54fce265 100644 --- a/src/ell/studio/config.py +++ b/src/ell/studio/config.py @@ -19,6 +19,10 @@ class Config(BaseModel): pg_connection_string: Optional[str] = None storage_dir: Optional[str] = None mqtt_connection_string: Optional[str] = None + minio_endpoint: Optional[str] = None + minio_access_key: Optional[str] = None + minio_secret_key: Optional[str] = None + minio_bucket: Optional[str] = None @classmethod def create( @@ -26,10 +30,11 @@ def create( storage_dir: Optional[str] = None, pg_connection_string: Optional[str] = None, mqtt_connection_string: Optional[str] = None, + minio_endpoint: Optional[str] = None, + minio_access_key: Optional[str] = None, + minio_secret_key: Optional[str] = None, + minio_bucket: Optional[str] = None, ) -> 'Config': - pg_connection_string = pg_connection_string or os.getenv("ELL_PG_CONNECTION_STRING") - storage_dir = storage_dir or os.getenv("ELL_STORAGE_DIR") - # Enforce that we use either sqlite or postgres, but not both if pg_connection_string is not None and storage_dir is not None: raise ValueError("Cannot use both sqlite and postgres") @@ -39,6 +44,12 @@ def create( # This intends to honor the default we had set in the CLI storage_dir = os.getcwd() - mqtt_connection_string = mqtt_connection_string or os.getenv("ELL_MQTT_CONNECTION_STRING") - - return cls(pg_connection_string=pg_connection_string, storage_dir=storage_dir, mqtt_connection_string=mqtt_connection_string) \ No newline at end of file + return cls( + pg_connection_string=pg_connection_string, + storage_dir=storage_dir, + mqtt_connection_string=mqtt_connection_string, + minio_endpoint=minio_endpoint, + minio_access_key=minio_access_key, + minio_secret_key=minio_secret_key, + minio_bucket=minio_bucket + ) diff --git a/src/ell/studio/server.py b/src/ell/studio/server.py index 725773ad5..0fdecc4f1 100644 --- a/src/ell/studio/server.py +++ b/src/ell/studio/server.py @@ -3,6 +3,9 @@ from typing import Optional, Dict, Any from sqlmodel import Session + +from ell.serialize.client import get_serializer, get_blob_store +from ell.serialize.config import SerializeConfig from ell.stores.sql import PostgresStore, SQLiteStore from ell import __version__ from fastapi import FastAPI, Query, HTTPException, Depends, Response, WebSocket, WebSocketDisconnect @@ -28,10 +31,12 @@ def get_serializer(config: Config): + serialize_config = SerializeConfig(**config.model_dump()) + blob_store = get_blob_store(serialize_config) if config.pg_connection_string: - return PostgresStore(config.pg_connection_string) + return PostgresStore(config.pg_connection_string, blob_store) elif config.storage_dir: - return SQLiteStore(config.storage_dir) + return SQLiteStore(config.storage_dir, blob_store) else: raise ValueError("No storage configuration found") diff --git a/src/ell/types/serialize.py b/src/ell/types/serialize.py index af6a1357e..118db918e 100644 --- a/src/ell/types/serialize.py +++ b/src/ell/types/serialize.py @@ -29,12 +29,14 @@ class WriteLMPInput(BaseModel): api_params: Optional[Dict[str, Any]] = None initial_free_vars: Optional[Dict[str, Any]] = None initial_global_vars: Optional[Dict[str, Any]] = None + # TODO. dict or list? + # uses: List[str] = Field(default_factory=list) # this is omitted so as to not confuse whether the number should be incremented (should always happen at the db level) # num_invocations: Optional[int] = None commit_message: Optional[str] = None version_number: Optional[int] = None - created_at: Optional[AwareDatetime] = Field(default_factory=utc_now) + created_at: Optional[AwareDatetime] = Field(default_factory=utc_now) class LMP(BaseModel): @@ -52,7 +54,10 @@ class LMP(BaseModel): num_invocations: int -GetLMPResponse = Optional[LMP] +class GetLMPInput(BaseModel): + id: str + +GetLMPOutput = Optional[LMP] InvocationResults = Union[List[Message], Any] @@ -79,7 +84,9 @@ def total_size_bytes(self) -> int: self.global_vars, self.free_vars ] - return sum(len(json.dumps(field, default=(lambda x: x.model_dump_json() if isinstance(x, BaseModel) else str(x))).encode('utf-8')) for field in json_fields if field is not None) + return sum(len(json.dumps(field, default=( + lambda x: x.model_dump_json() if isinstance(x, BaseModel) else str(x))).encode('utf-8')) for field in + json_fields if field is not None) @cached_property def should_externalize(self) -> bool: @@ -113,3 +120,26 @@ class LMPInvokedEvent(BaseModel): lmp_id: str # invocation_id: str consumes: List[str] + + +class WriteBlobInput(BaseModel): + """ + Arguments to write a blob to a blob store + """ + blob_id: str + blob: bytes + metadata: Optional[Dict[str, Any]] = None + + + +# class Blob(BaseModel): +# blob_id: str +# blob: bytes +# content_type: str +# metadata: Optional[Dict[str, Any]] = None +# +# @cached_property +# def size_bytes(self) -> int: +# return len(self.blob) +# +# diff --git a/tests/api/test_api.py b/tests/api/test_api.py index fc04dd924..fa80929a5 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -5,7 +5,7 @@ from typing import Any, Dict from fastapi.testclient import TestClient -from ell.api.client.sqlite import EllSqliteClient +from ell.serialize.sqlite import SQLiteSerializer from ell.api.server import create_app, get_pubsub, get_serializer from ell.api.config import Config from ell.api.logger import setup_logging @@ -16,8 +16,8 @@ @pytest.fixture -def sql_store() -> EllSqliteClient: - return EllSqliteClient(":memory:") +def sql_store() -> SQLiteSerializer: + return SQLiteSerializer(":memory:") def test_construct_serialized_lmp(): @@ -87,7 +87,7 @@ def test_write_lmp_input(): assert input2.created_at.tzinfo == timezone.utc -def create_test_app(sql_store: EllSqliteClient): +def create_test_app(sql_store: SQLiteSerializer): setup_logging(DEBUG) config = Config(storage_dir=":memory:") app = create_app(config) @@ -109,7 +109,7 @@ def get_serializer_override(): return app, client, publisher, config -def test_write_lmp(sql_store: EllSqliteClient): +def test_write_lmp(sql_store: SQLiteSerializer): _app, client, *_ = create_test_app(sql_store) # fime. figure out what's going on with `uses` @@ -145,7 +145,7 @@ def test_write_lmp(sql_store: EllSqliteClient): assert lmp.json() == {**lmp_data, "num_invocations": 0} -def test_write_invocation(sql_store: EllSqliteClient): +def test_write_invocation(sql_store: SQLiteSerializer): _app, client, *_ = create_test_app(sql_store) lmp_id = uuid4().hex From 3f39f860b5f357cb587a6c3af3e7528a2c8befc8 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 10 Nov 2024 09:16:27 -0800 Subject: [PATCH 22/40] remove unnecessary cast --- src/ell/stores/studio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ell/stores/studio.py b/src/ell/stores/studio.py index 3c3e7c387..bae888b3a 100644 --- a/src/ell/stores/studio.py +++ b/src/ell/stores/studio.py @@ -103,7 +103,7 @@ def from_api(input: ell.types.serialize.WriteLMPInput): initial_global_vars=input.initial_global_vars, initial_free_vars=input.initial_free_vars, commit_message=input.commit_message, - created_at=cast(datetime, input.created_at) + created_at=input.created_at ) From e39cb6d0f62aac2737004fd8d2ca45793ce0e761 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 10 Nov 2024 09:24:02 -0800 Subject: [PATCH 23/40] update tests --- tests/api/test_api.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/api/test_api.py b/tests/api/test_api.py index fa80929a5..4ac869aa3 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -5,7 +5,7 @@ from typing import Any, Dict from fastapi.testclient import TestClient -from ell.serialize.sqlite import SQLiteSerializer +from ell.serialize.sqlite import SQLiteSerializer, AsyncSQLiteSerializer from ell.api.server import create_app, get_pubsub, get_serializer from ell.api.config import Config from ell.api.logger import setup_logging @@ -16,9 +16,12 @@ @pytest.fixture -def sql_store() -> SQLiteSerializer: +def sqlite_serializer() -> SQLiteSerializer: return SQLiteSerializer(":memory:") +@pytest.fixture +def async_sqlite_serializer() -> AsyncSQLiteSerializer: + return AsyncSQLiteSerializer(":memory:") def test_construct_serialized_lmp(): serialized_lmp = SerializedLMP( @@ -87,7 +90,7 @@ def test_write_lmp_input(): assert input2.created_at.tzinfo == timezone.utc -def create_test_app(sql_store: SQLiteSerializer): +def create_test_app(serializer: AsyncSQLiteSerializer): setup_logging(DEBUG) config = Config(storage_dir=":memory:") app = create_app(config) @@ -99,7 +102,7 @@ async def get_publisher_override(): def get_serializer_override(): - return sql_store + return serializer app.dependency_overrides[get_pubsub] = get_publisher_override app.dependency_overrides[get_serializer] = get_serializer_override @@ -109,8 +112,8 @@ def get_serializer_override(): return app, client, publisher, config -def test_write_lmp(sql_store: SQLiteSerializer): - _app, client, *_ = create_test_app(sql_store) +def test_write_lmp(async_sqlite_serializer: AsyncSQLiteSerializer): + _app, client, *_ = create_test_app(async_sqlite_serializer) # fime. figure out what's going on with `uses` lmp_data: Dict[str, Any] = { @@ -145,8 +148,8 @@ def test_write_lmp(sql_store: SQLiteSerializer): assert lmp.json() == {**lmp_data, "num_invocations": 0} -def test_write_invocation(sql_store: SQLiteSerializer): - _app, client, *_ = create_test_app(sql_store) +def test_write_invocation(async_sqlite_serializer: AsyncSQLiteSerializer): + _app, client, *_ = create_test_app(async_sqlite_serializer) lmp_id = uuid4().hex lmp_data: Dict[str, Any] = { From b2d5d8469715d0789859a9a720e84a3aede2cf58 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 10 Nov 2024 09:28:44 -0800 Subject: [PATCH 24/40] rename client to serializer --- src/ell/api/server.py | 2 +- src/ell/serialize/{client.py => serializer.py} | 0 src/ell/studio/server.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/ell/serialize/{client.py => serializer.py} (100%) diff --git a/src/ell/api/server.py b/src/ell/api/server.py index 84c6bb019..1efbd63c9 100644 --- a/src/ell/api/server.py +++ b/src/ell/api/server.py @@ -9,7 +9,7 @@ from ell.api.config import Config from ell.api.pubsub.abc import PubSub -from ell.serialize.client import get_async_serializer +from ell.serialize.serializer import get_async_serializer from ell.serialize.config import SerializeConfig from ell.serialize.protocol import EllAsyncSerializer from ell.types.serialize import GetLMPOutput, LMPInvokedEvent, WriteInvocationInput, WriteLMPInput, LMP, WriteBlobInput diff --git a/src/ell/serialize/client.py b/src/ell/serialize/serializer.py similarity index 100% rename from src/ell/serialize/client.py rename to src/ell/serialize/serializer.py diff --git a/src/ell/studio/server.py b/src/ell/studio/server.py index 0fdecc4f1..f45316d96 100644 --- a/src/ell/studio/server.py +++ b/src/ell/studio/server.py @@ -4,7 +4,7 @@ from sqlmodel import Session -from ell.serialize.client import get_serializer, get_blob_store +from ell.serialize.serializer import get_serializer, get_blob_store from ell.serialize.config import SerializeConfig from ell.stores.sql import PostgresStore, SQLiteStore from ell import __version__ From a71c28f93b569b1aae9c5cd47288ce7ce6616079 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 10 Nov 2024 11:32:40 -0800 Subject: [PATCH 25/40] add serializer to core --- src/ell/configurator.py | 34 +++++++++--- src/ell/lmp/_track.py | 23 ++++----- src/ell/serialize/config.py | 20 +++---- src/ell/serialize/protocol.py | 4 +- src/ell/serialize/serializer.py | 7 +++ src/ell/serialize/sql.py | 92 +++++++++++++++++++++++++++++++++ 6 files changed, 148 insertions(+), 32 deletions(-) create mode 100644 src/ell/serialize/sql.py diff --git a/src/ell/configurator.py b/src/ell/configurator.py index ae8aa8c68..90a2b4127 100644 --- a/src/ell/configurator.py +++ b/src/ell/configurator.py @@ -8,6 +8,9 @@ from ell.provider import Provider from dataclasses import dataclass, field +from ell.serialize.config import SerializeConfig +from ell.serialize.protocol import EllSerializer +from ell.serialize.serializer import get_serializer from ell.util.errors import missing_ell_extras if TYPE_CHECKING: @@ -47,6 +50,7 @@ class Config(BaseModel): default_client: Optional[openai.Client] = Field(default=None, description="The default OpenAI client used when a specific model client is not found.") autocommit_model: str = Field(default="gpt-4o-mini", description="When set, changes the default autocommit model from GPT 4o mini.") providers: Dict[Type, Provider] = Field(default_factory=dict, description="A dictionary mapping client types to provider classes.") + serializer: EllSerializer = Field(default=None, description="Serializer used for LMPs and invocations") def __init__(self, **data): super().__init__(**data) self._lock = threading.Lock() @@ -157,7 +161,9 @@ def init( lazy_versioning: bool = True, default_api_params: Optional[Dict[str, Any]] = None, default_client: Optional[Any] = None, - autocommit_model: str = "gpt-4o-mini" + autocommit_model: str = "gpt-4o-mini", + api_server_url: Optional[str] = None, + serializer: Optional[EllSerializer] = None, ) -> None: """ Initialize the ELL configuration with various settings. @@ -176,22 +182,38 @@ def init( :type default_openai_client: openai.Client, optional :param autocommit_model: Set the model used for autocommitting. :type autocommit_model: str + :param api_server_url: Ell API server URL + :type api_server_url: str + :param serializer: Ell serializer class. + :type serializer: EllSerializer """ # XXX: prevent double init config.verbose = verbose config.lazy_versioning = lazy_versioning - if isinstance(store, str): + if not isinstance(store, str): try: - from ell.stores.sql import SQLiteStore - config.store = SQLiteStore(store) + from ell.serialize.sql import SQLSerializer + config.serializer = SQLSerializer(store) + config.store = config.serializer.store # legacy except ImportError: raise missing_ell_extras( - message="Failed importing SQLiteStore", + message="Failed importing SQL store dependencies", extras=["all"] ) else: - config.store = store + if serializer is not None: + config.serializer = serializer + else: + serialize_config = SerializeConfig( + api_server_url=api_server_url, + storage_dir=store, + # ...other options + log_level=20 if verbose else 0, + ) + if serialize_config.is_enabled: + config.serializer = get_serializer(serialize_config) + config.autocommit = autocommit or config.autocommit if default_api_params is not None: diff --git a/src/ell/lmp/_track.py b/src/ell/lmp/_track.py index 0e67fffce..75b5b02e4 100644 --- a/src/ell/lmp/_track.py +++ b/src/ell/lmp/_track.py @@ -2,6 +2,7 @@ import logging import threading from ell.types.lmp import LMPType +from ell.types.serialize import Invocation, InvocationContents, WriteInvocationInput, utc_now, WriteLMPInput from ell.util._warnings import _autocommit_warning import ell.util.closure from ell.configurator import config @@ -16,10 +17,6 @@ from ell.util.serialization import compute_state_cache_key from ell.util.serialization import prepare_invocation_params -try: - from ell.stores.studio import SerializedLMP, Invocation, InvocationContents, utc_now -except ImportError: - SerializedLMP = Invocation = InvocationContents = utc_now = None logger = logging.getLogger(__name__) @@ -61,7 +58,7 @@ def tracked_func(*fn_args, _get_invocation_id=False, **fn_kwargs) -> str: invocation_id = "invocation-" + secrets.token_hex(16) state_cache_key : str = None - if not config.store: + if not config.serializer: return func_to_track(*fn_args, **fn_kwargs, _invocation_origin=invocation_id)[0] parent_invocation_id = get_current_invocation() @@ -169,7 +166,7 @@ def _serialize_lmp(func): name = func.__qualname__ api_params = getattr(func, "__ell_api_params__", None) - lmps = config.store.get_versions_by_fqn(fqn=name) + lmps = config.serializer.get_lmp_versions(fqn=name) version = 0 already_in_store = any(lmp.lmp_id == func.__ell_hash__ for lmp in lmps) @@ -186,7 +183,7 @@ def _serialize_lmp(func): f"{latest_lmp.dependencies}\n\n{latest_lmp.source}", f"{fn_closure[1]}\n\n{fn_closure[0]}")[0]) - serialized_lmp = SerializedLMP( + serialized_lmp = WriteLMPInput( lmp_id=func.__ell_hash__, name=name, created_at=utc_now(), @@ -199,7 +196,7 @@ def _serialize_lmp(func): api_params=api_params if api_params else None, version_number=version, ) - config.store.write_lmp(serialized_lmp, [f.__ell_hash__ for f in func.__ell_uses__]) + config.serializer.write_lmp(serialized_lmp, [f.__ell_hash__ for f in func.__ell_uses__]) func._has_serialized_lmp = True def _write_invocation(func, invocation_id, latency_ms, prompt_tokens, completion_tokens, @@ -214,14 +211,14 @@ def _write_invocation(func, invocation_id, latency_ms, prompt_tokens, completion free_vars=get_immutable_vars(func.__ell_closure__[3]) ) - if invocation_contents.should_externalize and config.store.has_blob_storage: + if invocation_contents.should_externalize and config.serializer.supports_blobs: invocation_contents.is_external = True # Write to the blob store - blob_id = config.store.blob_store.store_blob( - json.dumps(invocation_contents.model_dump( + blob_id = config.serializer.store_blob( + blob_id=invocation_id, + blob=json.dumps(invocation_contents.model_dump( ), default=str, ensure_ascii=False).encode('utf-8'), - invocation_id ) invocation_contents = InvocationContents( invocation_id=invocation_id, @@ -240,5 +237,5 @@ def _write_invocation(func, invocation_id, latency_ms, prompt_tokens, completion contents=invocation_contents ) - config.store.write_invocation(invocation, consumes) + config.serializer.write_invocation(WriteInvocationInput(invocation=invocation, consumes=consumes)) diff --git a/src/ell/serialize/config.py b/src/ell/serialize/config.py index c88bac6f8..8b0df4089 100644 --- a/src/ell/serialize/config.py +++ b/src/ell/serialize/config.py @@ -1,7 +1,6 @@ import json -import os from typing import Any, Optional -from pydantic import BaseModel +from pydantic import BaseModel, Field, computed_field import logging @@ -9,9 +8,9 @@ class SerializeConfig(BaseModel): - storage_dir: Optional[str] = None + storage_dir: Optional[str] = Field(default=None, description="Filesystem path used for SQLite and local blob storage") pg_connection_string: Optional[str] = None - mqtt_connection_string: Optional[str] = None + api_server_endpoint: Optional[str] = Field(default=None, description="Ell API server endpoint") minio_endpoint: Optional[str] = None minio_access_key: Optional[str] = None minio_secret_key: Optional[str] = None @@ -23,15 +22,12 @@ def __init__(self, **kwargs: Any): super().__init__(**kwargs) def model_post_init(self, __context: Any): - # Enforce that we use either sqlite or postgres, but not both + # Enforce that we use 1 storage backend (for now) if self.pg_connection_string is not None and self.storage_dir is not None: raise ValueError("Cannot use both sqlite and postgres") + logger.debug(f"Resolved config: {json.dumps(self.model_dump(exclude_none=True), indent=2)}") - # Fall back to sqlite if no PostgreSQL connection string is provided - if self.pg_connection_string is None and self.storage_dir is None: - # This intends to honor the default we had set in the CLI - # todo. better default? - self.storage_dir = os.getcwd() - - logger.info(f"Resolved config: {json.dumps(self.model_dump(exclude_none=True), indent=2)}") + @computed_field + def is_enabled(self) -> bool: + return bool(self.api_server_endpoint or self.pg_connection_string or self.storage_dir or self.minio_endpoint) diff --git a/src/ell/serialize/protocol.py b/src/ell/serialize/protocol.py index 0df700aea..396befc6a 100644 --- a/src/ell/serialize/protocol.py +++ b/src/ell/serialize/protocol.py @@ -1,8 +1,9 @@ -from typing import Protocol, Optional, List, Dict, Any +from typing import Protocol, Optional, List, Dict, Any, runtime_checkable from ell.types.serialize import GetLMPOutput, WriteLMPInput, WriteInvocationInput, LMP +@runtime_checkable class EllSerializer(Protocol): supports_blobs: bool @@ -28,6 +29,7 @@ def get_lmp_versions(self, fqn: str) -> List[LMP]: ... +@runtime_checkable class EllAsyncSerializer(Protocol): supports_blobs: bool diff --git a/src/ell/serialize/serializer.py b/src/ell/serialize/serializer.py index 7adca6773..e8fa563df 100644 --- a/src/ell/serialize/serializer.py +++ b/src/ell/serialize/serializer.py @@ -37,6 +37,13 @@ def get_serializer(config: SerializeConfig) -> EllSerializer: return SQLiteSerializer(config.storage_dir, blob_store) except ImportError: raise missing_ell_extras(message="SQLite storage is not enabled.", extras=["sqlite"]) + if config.api_server_endpoint: + try: + from ell.serialize.http import EllHTTPSerializer + return EllHTTPSerializer(config.api_server_endpoint) + except ImportError: + raise missing_ell_extras(message="HTTP serialization is not enabled.", extras=["sqlite"]) + raise ValueError("No storage configuration found.") diff --git a/src/ell/serialize/sql.py b/src/ell/serialize/sql.py new file mode 100644 index 000000000..4ab0fb560 --- /dev/null +++ b/src/ell/serialize/sql.py @@ -0,0 +1,92 @@ +from typing import List, Optional, Dict, Any + +import ell.stores.store +from ell.stores.studio import Invocation, SerializedLMP +from ell.types.serialize import LMP, WriteLMPInput, WriteInvocationInput +from ell.serialize.protocol import EllSerializer, EllAsyncSerializer + + +class SQLSerializer(EllSerializer): + def __init__(self, store: ell.stores.store.Store ): + self.store = store + self.supports_blobs = store.has_blob_storage + + def get_lmp(self, lmp_id: str): + lmp = self.store.get_lmp(lmp_id) + if lmp: + return LMP(**lmp.model_dump()) + return None + + def get_lmp_versions(self, fqn: str) -> List[LMP]: + slmps = self.store.get_versions_by_fqn(fqn) + return [LMP(**slmp.model_dump()) for slmp in slmps] + + def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + model = SerializedLMP.from_api(lmp) + self.store.write_lmp(model, uses) + + def write_invocation(self, input: WriteInvocationInput) -> None: + invocation = Invocation.from_api(input.invocation) + self.store.write_invocation(invocation, set(input.consumes)) + return None + + def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.store_blob(blob=blob, blob_id=blob_id) + + def retrieve_blob(self, blob_id: str) -> bytes: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.retrieve_blob(blob_id) + + def close(self): + pass + + +# todo(async): the underlying store and blob store is not async-aware +class AsyncSQLSerializer(EllAsyncSerializer): + def __init__(self, store: ell.stores.store.Store): + self.store = store + self.supports_blobs = store.has_blob_storage + + async def get_lmp(self, lmp_id: str) -> Optional[LMP]: + lmp = self.store.get_lmp(lmp_id) + if lmp: + return LMP(**lmp.model_dump()) + return None + + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + slmps = self.store.get_versions_by_fqn(fqn) + return [LMP(**slmp.model_dump()) for slmp in slmps] + + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + model = SerializedLMP.from_api(lmp) + self.store.write_lmp(model, uses) + + async def write_invocation(self, input: WriteInvocationInput) -> None: + invocation = Invocation.from_api(input.invocation) + self.store.write_invocation( + invocation, + set(input.consumes) + ) + return None + + async def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: + if self.store.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.store_blob(blob=blob, blob_id=blob_id) + + async def retrieve_blob(self, blob_id: str) -> bytes: + if self.blob_store is None: + raise ValueError("Blob store is not enabled") + return self.store.blob_store.retrieve_blob(blob_id) + + async def close(self): + pass + + async def __aenter__(self): + return self + + async def __aexit__(self): + await self.close() From 651fbfdb4678570bc50508c19277ef10d1ccc7f3 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 10 Nov 2024 18:51:46 -0800 Subject: [PATCH 26/40] port invocation contents serialize fixes --- src/ell/types/serialize.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/ell/types/serialize.py b/src/ell/types/serialize.py index 118db918e..aff3c0345 100644 --- a/src/ell/types/serialize.py +++ b/src/ell/types/serialize.py @@ -84,9 +84,11 @@ def total_size_bytes(self) -> int: self.global_vars, self.free_vars ] - return sum(len(json.dumps(field, default=( - lambda x: x.model_dump_json() if isinstance(x, BaseModel) else str(x))).encode('utf-8')) for field in - json_fields if field is not None) + return sum( + len(json.dumps(field, default=(lambda x: json.dumps(x.model_dump(), default=str, ensure_ascii=False) + if isinstance(x, BaseModel) else str(x)), ensure_ascii=False).encode('utf-8')) + for field in json_fields if field is not None + ) @cached_property def should_externalize(self) -> bool: From 1209cd8dd0d21ae024b218e0ed66eef9e50b4026 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 17 Nov 2024 06:03:00 -0800 Subject: [PATCH 27/40] json serialization over http --- examples/future/http_serializer.py | 25 +++ src/ell/configurator.py | 6 +- src/ell/lmp/_track.py | 18 +- src/ell/serialize/config.py | 4 +- src/ell/serialize/http.py | 277 ++++++++++++++++++++++------- src/ell/serialize/serializer.py | 4 +- src/ell/stores/studio.py | 9 +- src/ell/types/_lstr.py | 5 +- src/ell/types/message.py | 76 +++++--- src/ell/types/serialize.py | 41 +++-- src/ell/util/serialization.py | 14 +- tests/api/test_api.py | 118 +++++++++++- tests/test_message_type.py | 26 ++- 13 files changed, 501 insertions(+), 122 deletions(-) create mode 100644 examples/future/http_serializer.py diff --git a/examples/future/http_serializer.py b/examples/future/http_serializer.py new file mode 100644 index 000000000..20844f3cf --- /dev/null +++ b/examples/future/http_serializer.py @@ -0,0 +1,25 @@ +from pydantic import Field +import ell + +ell.init(api_url='http://localhost:8081') + +@ell.tool() +def get_weather(location: str = Field(description="The full name of a city and country, e.g. San Francisco, CA, USA")): + """Get the current weather for a given location.""" + # Simulated weather API call + return f"The weather in {location} is sunny." + +@ell.complex(model="gpt-4o", tools=[get_weather]) +def travel_planner(destination: str): + """Plan a trip based on the destination and current weather.""" + return [ + ell.system("You are a travel planner. Use the weather tool to provide relevant advice."), + ell.user(f"Plan a trip to {destination}") + ] + +result = travel_planner("Paris") +print(result.text) # Prints travel advice +if result.tool_calls: + # This is done so that we can pass the tool calls to the language model + tool_results = result.call_tools_and_collect_as_message() + print("Weather info:", (tool_results.text)) diff --git a/src/ell/configurator.py b/src/ell/configurator.py index 90a2b4127..125f4e817 100644 --- a/src/ell/configurator.py +++ b/src/ell/configurator.py @@ -162,7 +162,7 @@ def init( default_api_params: Optional[Dict[str, Any]] = None, default_client: Optional[Any] = None, autocommit_model: str = "gpt-4o-mini", - api_server_url: Optional[str] = None, + api_url: Optional[str] = None, serializer: Optional[EllSerializer] = None, ) -> None: """ @@ -191,7 +191,7 @@ def init( config.verbose = verbose config.lazy_versioning = lazy_versioning - if not isinstance(store, str): + if store and not isinstance(store, str): try: from ell.serialize.sql import SQLSerializer config.serializer = SQLSerializer(store) @@ -206,7 +206,7 @@ def init( config.serializer = serializer else: serialize_config = SerializeConfig( - api_server_url=api_server_url, + api_url=api_url, storage_dir=store, # ...other options log_level=20 if verbose else 0, diff --git a/src/ell/lmp/_track.py b/src/ell/lmp/_track.py index 75b5b02e4..ef4dd04ff 100644 --- a/src/ell/lmp/_track.py +++ b/src/ell/lmp/_track.py @@ -1,6 +1,8 @@ import json import logging import threading + +from ell.types import Message, ContentBlock, ToolResult from ell.types.lmp import LMPType from ell.types.serialize import Invocation, InvocationContents, WriteInvocationInput, utc_now, WriteLMPInput from ell.util._warnings import _autocommit_warning @@ -201,11 +203,22 @@ def _serialize_lmp(func): def _write_invocation(func, invocation_id, latency_ms, prompt_tokens, completion_tokens, state_cache_key, invocation_api_params, cleaned_invocation_params, consumes, result, parent_invocation_id): - + + # print(result) + # todo(alex). figure out what's going on here, looks like we're getting result as a tool result / single message sometimes + + results = None + if isinstance(result, list): + results = result + elif isinstance(result, ToolResult): + results = [Message(role='tool', content=[ContentBlock(tool_result=result)])] + else: + results = [result] + invocation_contents = InvocationContents( invocation_id=invocation_id, params=cleaned_invocation_params, - results=result, + results=results, invocation_api_params=invocation_api_params, global_vars=get_immutable_vars(func.__ell_closure__[2]), free_vars=get_immutable_vars(func.__ell_closure__[3]) @@ -217,6 +230,7 @@ def _write_invocation(func, invocation_id, latency_ms, prompt_tokens, completion # Write to the blob store blob_id = config.serializer.store_blob( blob_id=invocation_id, + #todo(alex): normalize serialization blob=json.dumps(invocation_contents.model_dump( ), default=str, ensure_ascii=False).encode('utf-8'), ) diff --git a/src/ell/serialize/config.py b/src/ell/serialize/config.py index 8b0df4089..0d0a9c504 100644 --- a/src/ell/serialize/config.py +++ b/src/ell/serialize/config.py @@ -9,8 +9,8 @@ class SerializeConfig(BaseModel): storage_dir: Optional[str] = Field(default=None, description="Filesystem path used for SQLite and local blob storage") + api_url: Optional[str] = Field(default=None, description="ell API server endpoint") pg_connection_string: Optional[str] = None - api_server_endpoint: Optional[str] = Field(default=None, description="Ell API server endpoint") minio_endpoint: Optional[str] = None minio_access_key: Optional[str] = None minio_secret_key: Optional[str] = None @@ -29,5 +29,5 @@ def model_post_init(self, __context: Any): @computed_field def is_enabled(self) -> bool: - return bool(self.api_server_endpoint or self.pg_connection_string or self.storage_dir or self.minio_endpoint) + return bool(self.api_url or self.pg_connection_string or self.storage_dir or self.minio_endpoint) diff --git a/src/ell/serialize/http.py b/src/ell/serialize/http.py index 2b416be62..694951a20 100644 --- a/src/ell/serialize/http.py +++ b/src/ell/serialize/http.py @@ -8,64 +8,143 @@ from ell.types.serialize import GetLMPOutput, WriteLMPInput, LMP, WriteInvocationInput +# tood. make sure we don't lose any information or end up with malformed stuff relative to what +# the sto4res have been doing for serialization (this function) +# this should probably just be handled by the serialization types to centralize serialization code in one place +# def to_json(obj): +# """Serializes ell objects to json for writing to the database or wire protocols""" +# return json.dumps( +# pydantic_ltype_aware_cattr.unstructure(obj), +# sort_keys=True, default=repr, ensure_ascii=False) + + class EllHTTPSerializer(EllSerializer): def __init__(self, base_url: str): self.base_url = base_url self.client = httpx.Client(base_url=base_url) self.supports_blobs = True # we assume the server does, if not will find out later + self.logger = logging.getLogger( + __name__).getChild(self.__class__.__name__) + + def _handle_http_error( + self, + error: HTTPStatusError, + span: str, + message: Optional[str] = None, + extra: Optional[Dict[str, Any]] = None + ) -> None: + if error.response.status_code == 422: + error_detail = error.response.json().get( + "detail", "No detailed error message provided") + self.logger.error( + message or f"HTTP {error.response.status_code} Error in {span}", + extra={ + **(extra or {}), + "status_code": error.response.status_code, + "error_detail": error_detail, + "span": span, + "url": str(error.response.url), + "request_id": error.response.headers.get("x-request-id"), + } + ) + raise ValueError(f"Invalid input: {error_detail}") from error + raise def get_lmp(self, lmp_id: str) -> GetLMPOutput: - response = self.client.get(f"/lmp/{lmp_id}") - response.raise_for_status() - data = response.json() - if data is None: - return None - return LMP(**data) + try: + response = self.client.get(f"/lmp/{lmp_id}") + response.raise_for_status() + data = response.json() + return None if data is None else LMP(**data) + except HTTPStatusError as e: + self._handle_http_error(e, "get_lmp") + raise def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: try: response = self.client.post("/lmp", json={ - "lmp": lmp.model_dump(mode="json"), + # todo. restructure so model_dump_json + # todo. because pydantic doesn't have a sane default for this we should consider a single place to specify exclude_none, exclude_unset like we had with unstructure for basemodel... + "lmp": lmp.model_dump(mode='json', exclude_none=True, exclude_unset=True), "uses": uses }) response.raise_for_status() except HTTPStatusError as e: - if e.response.status_code == 422: - error_detail = e.response.json().get("detail", "No detailed error message provided") - logging.error(f"Unprocessable Entity (422) Error: {error_detail}") - raise ValueError(f"Invalid input: {error_detail}") from e + self._handle_http_error( + error=e, + span="write_lmp", + message="Failed to write LMP", + extra={'lmp_id': lmp.lmp_id, 'lmp_version': lmp.version_number} + ) raise def write_invocation(self, input: WriteInvocationInput) -> None: - response = self.client.post( - "/invocation", - json=input.model_dump(mode="json") - ) - response.raise_for_status() - return None + try: + response = self.client.post( + url="/invocation", + headers={"Content-Type": "application/json"}, + content=input.model_dump_json(exclude_none=True, exclude_unset=True), + ) + response.raise_for_status() + return None + except HTTPStatusError as e: + self._handle_http_error( + error=e, + span="write_invocation", + message="Failed to write invocation", + extra={'invocation_id': input.invocation.id} + ) + raise def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - response = self.client.post("/blob", data={ - "blob_id": blob_id, - "blob": blob, - "metadata": metadata - }) - response.raise_for_status() - return response.json()["blob_id"] + try: + response = self.client.post("/blob", data={ + "blob_id": blob_id, + "blob": blob, + "metadata": metadata + }) + response.raise_for_status() + return response.json()["blob_id"] + except HTTPStatusError as e: + self._handle_http_error( + error=e, + span="store_blob", + message="Failed to store blob", + extra={'blob_id': blob_id} + ) + raise def retrieve_blob(self, blob_id: str) -> bytes: - response = self.client.get(f"/blob/{blob_id}") - response.raise_for_status() - return response.content + try: + response = self.client.get(f"/blob/{blob_id}") + response.raise_for_status() + return response.content + except HTTPStatusError as e: + self._handle_http_error( + error=e, + span="retrieve_blob", + message="Failed to retrieve blob", + extra={'blob_id': blob_id} + ) + raise def close(self): self.client.close() def get_lmp_versions(self, fqn: str) -> List[LMP]: - response = self.client.get("/lmp/versions", params={"fqn": fqn}) - response.raise_for_status() - data = response.json() - return [LMP(**lmp_data) for lmp_data in data] + try: + response = self.client.get("/lmp/versions", params={"fqn": fqn}) + response.raise_for_status() + data = response.json() + return [LMP(**lmp_data) for lmp_data in data] + except HTTPStatusError as e: + self._handle_http_error( + error=e, + span="get_lmp_versions", + message="Failed to get LMP versions", + extra={'fqn': fqn} + ) + raise class EllAsyncHTTPSerializer(EllAsyncSerializer): @@ -73,50 +152,115 @@ def __init__(self, base_url: str): self.base_url = base_url self.client = httpx.AsyncClient(base_url=base_url) self.supports_blobs = True # we assume the server does, if not will find out later + self.logger = logging.getLogger( + __name__).getChild(self.__class__.__name__) + + def _handle_http_error( + self, + error: HTTPStatusError, + span: str, + message: Optional[str] = None, + extra: Optional[Dict[str, Any]] = None + ) -> None: + if error.response.status_code == 422: + error_detail = error.response.json().get( + "detail", "No detailed error message provided") + self.logger.error( + message or f"HTTP {error.response.status_code} Error in {span}", + extra={ + **(extra or {}), + "status_code": error.response.status_code, + "error_detail": error_detail, + "span": span, + "url": str(error.response.url), + "request_id": error.response.headers.get("x-request-id"), + } + ) + raise ValueError(f"Invalid input: {error_detail}") from error + raise async def get_lmp(self, lmp_id: str) -> GetLMPOutput: - response = await self.client.get(f"/lmp/{lmp_id}") - response.raise_for_status() - data = response.json() - if data is None: - return None - return LMP(**data) + try: + response = await self.client.get(f"/lmp/{lmp_id}") + response.raise_for_status() + data = response.json() + if data is None: + return None + return LMP(**data) + except HTTPStatusError as e: + self._handle_http_error( + error=e, + span="get_lmp", + message="Failed to get LMP", + extra={'lmp_id': lmp_id} + ) + raise async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: try: response = await self.client.post("/lmp", json={ - "lmp": lmp.model_dump(mode="json"), + "lmp": lmp.model_dump(mode="json", exclude_none=True, exclude_unset=True), "uses": uses }) response.raise_for_status() except HTTPStatusError as e: - if e.response.status_code == 422: - error_detail = e.response.json().get("detail", "No detailed error message provided") - logging.error(f"Unprocessable Entity (422) Error: {error_detail}") - raise ValueError(f"Invalid input: {error_detail}") from e + self._handle_http_error( + error=e, + span="write_lmp", + message="Failed to write LMP", + extra={'lmp_id': lmp.lmp_id, 'lmp_version': lmp.version_number} + ) raise async def write_invocation(self, input: WriteInvocationInput) -> None: - response = await self.client.post( - "/invocation", - json=input.model_dump(mode="json") - ) - response.raise_for_status() - return None + try: + response = await self.client.post( + "/invocation", + headers={"Content-Type": "application/json"}, + content=input.model_dump_json(exclude_none=True, exclude_unset=True), + ) + response.raise_for_status() + return None + except HTTPStatusError as e: + self._handle_http_error( + error=e, + span="write_invocation", + message="Failed to write invocation", + extra={'invocation_id': input.invocation.id} + ) + raise async def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - response = await self.client.post("/blob", data={ - "blob_id": blob_id, - "blob": blob, - "metadata": metadata - }) - response.raise_for_status() - return response.json()["blob_id"] + try: + response = await self.client.post("/blob", data={ + "blob_id": blob_id, + "blob": blob, + "metadata": metadata + }) + response.raise_for_status() + return response.json()["blob_id"] + except HTTPStatusError as e: + self._handle_http_error( + error=e, + span="store_blob", + message="Failed to store blob", + extra={'blob_id': blob_id} + ) + raise async def retrieve_blob(self, blob_id: str) -> bytes: - response = await self.client.get(f"/blob/{blob_id}") - response.raise_for_status() - return response.content + try: + response = await self.client.get(f"/blob/{blob_id}") + response.raise_for_status() + return response.content + except HTTPStatusError as e: + self._handle_http_error( + error=e, + span="retrieve_blob", + message="Failed to retrieve blob", + extra={'blob_id': blob_id} + ) + raise async def close(self): await self.client.aclose() @@ -128,7 +272,16 @@ async def __aexit__(self): await self.close() async def get_lmp_versions(self, fqn: str) -> List[LMP]: - response = await self.client.get("/lmp/versions", params={"fqn": fqn}) - response.raise_for_status() - data = response.json() - return [LMP(**lmp_data) for lmp_data in data] + try: + response = await self.client.get("/lmp/versions", params={"fqn": fqn}) + response.raise_for_status() + data = response.json() + return [LMP(**lmp_data) for lmp_data in data] + except HTTPStatusError as e: + self._handle_http_error( + error=e, + span="get_lmp_versions", + message="Failed to get LMP versions", + extra={'fqn': fqn} + ) + raise diff --git a/src/ell/serialize/serializer.py b/src/ell/serialize/serializer.py index e8fa563df..6efdf718d 100644 --- a/src/ell/serialize/serializer.py +++ b/src/ell/serialize/serializer.py @@ -37,10 +37,10 @@ def get_serializer(config: SerializeConfig) -> EllSerializer: return SQLiteSerializer(config.storage_dir, blob_store) except ImportError: raise missing_ell_extras(message="SQLite storage is not enabled.", extras=["sqlite"]) - if config.api_server_endpoint: + if config.api_url: try: from ell.serialize.http import EllHTTPSerializer - return EllHTTPSerializer(config.api_server_endpoint) + return EllHTTPSerializer(config.api_url) except ImportError: raise missing_ell_extras(message="HTTP serialization is not enabled.", extras=["sqlite"]) diff --git a/src/ell/stores/studio.py b/src/ell/stores/studio.py index bae888b3a..cb768903e 100644 --- a/src/ell/stores/studio.py +++ b/src/ell/stores/studio.py @@ -197,7 +197,12 @@ class Invocation(InvocationBase, table=True): @classmethod def from_api(cls, input: ell.types.serialize.Invocation): + fields = { + field: getattr(input, field) + for field in input.model_fields + if field != "contents" + } return cls( - **input.model_dump(exclude={"contents"}), + **fields, contents=InvocationContents.from_api(input.contents) - ) + ) \ No newline at end of file diff --git a/src/ell/types/_lstr.py b/src/ell/types/_lstr.py index 55f5327a4..47c6fab4c 100644 --- a/src/ell/types/_lstr.py +++ b/src/ell/types/_lstr.py @@ -99,6 +99,7 @@ def __new__( instance = super(_lstr, cls).__new__(cls, content) # instance._logits = logits if isinstance(origin_trace, str): + # TODO. pydantic validation splits on ',', it would be good to have this in one place or standardize on a list for the serialized format unless ',' denotes something else instance.__origin_trace__ = frozenset({origin_trace}) else: instance.__origin_trace__ = ( @@ -116,8 +117,8 @@ def __get_pydantic_core_schema__( def validate_lstr(value): if isinstance(value, dict) and value.get("__lstr", False): content = value["content"] - origin_trace = value["__origin_trace__"].split(",") - return cls(content, origin_trace=origin_trace) + origin_trace = value["__origin_trace__"].split(",") if isinstance(value["__origin_trace__"], str) else frozenset(value["__origin_trace__"]) + return cls(content, origin_trace=origin_trace) # type: ignore elif isinstance(value, str): return cls(value) elif isinstance(value, cls): diff --git a/src/ell/types/message.py b/src/ell/types/message.py index 61acb4ad6..24e026438 100644 --- a/src/ell/types/message.py +++ b/src/ell/types/message.py @@ -6,14 +6,16 @@ import base64 from io import BytesIO from PIL import Image as PILImage +from types import FunctionType -from pydantic import BaseModel, ConfigDict, Field, model_validator, field_serializer +from pydantic import BaseModel, ConfigDict, Field, model_validator, field_serializer, field_validator from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Union, Type + +from ell.util.serialization import serialize_image, unstructure_lstr -from ell.util.serialization import serialize_image _lstr_generic = Union[_lstr, str] InvocableTool = Callable[..., Union["ToolResult", _lstr_generic, List["ContentBlock"], ]] @@ -22,8 +24,8 @@ class ToolResult(BaseModel): - tool_call_id: _lstr_generic - result: List["ContentBlock"] + tool_call_id: _lstr_generic = Field(description="Id of the tool call from the model that led the tool to be called (`'call_{id}'`)") + result: List["ContentBlock"] = Field(description="Tool call output as a list of ell ContentBlocks") @property def text(self) -> str: @@ -41,13 +43,39 @@ def __repr__(self): return f"{self.__class__.__name__}(tool_call_id={self.tool_call_id}, result={_content_to_text(self.result)})" class ToolCall(BaseModel): - tool : InvocableTool - tool_call_id : Optional[_lstr_generic] = Field(default=None) - params : BaseModel + tool: Union[InvocableTool, str] = Field(description="The tool function to call or a reference to it when serialized") + tool_call_id: Optional[_lstr_generic] = Field(default=None) + # todo. If we include BaseModel in this union instead of Any, then pydantic + # constructs `BaseModel()` when we call super().__init__ with a dictionary + params: Union[Any, Dict[str, Any]] + + # TODO. This should reference a tool fqn + version if possible + # ell should have an InvocableTool with __ properties that have this info at serialization time + @field_serializer('tool') + def serialize_tool(self, tool: InvocableTool, _info): + return tool.__name__ if hasattr(tool, '__name__') else str(tool) + + # @field_serializer('params') + # def serialize_params(self, params: BaseModel, _info): + # # Explicitly serialize the params BaseModel + # return params.model_dump(exclude_none=True) + + @field_serializer('tool_call_id') + def serialize_tool_call_id(self, tool_call_id: _lstr_generic): + if tool_call_id is None: + return None + origin_trace = tool_call_id.__dict__['__origin_trace__'] + if origin_trace: + return unstructure_lstr(tool_call_id) + return tool_call_id def __init__(self, tool, params : Union[BaseModel, Dict[str, Any]], tool_call_id=None): - if not isinstance(params, BaseModel): + if isinstance(tool, FunctionType) and hasattr(tool, '__ell_params_model__'): params = tool.__ell_params_model__(**params) #convenience. + if isinstance(tool_call_id, dict): + tool_call_id = _lstr(content=tool_call_id['content'], + origin_trace=tool_call_id.get('__origin_trace__'), + logits=tool_call_id.get('logits')) super().__init__(tool=tool, tool_call_id=tool_call_id, params=params) def __call__(self, **kwargs): @@ -61,7 +89,10 @@ def call_and_collect_as_message_block(self): raise DeprecationWarning("call_and_collect_as_message_block is deprecated. Use collect_as_content_block instead.") def call_and_collect_as_content_block(self): - res = self.tool(**self.params.model_dump(), _tool_call_id=self.tool_call_id) + if isinstance(self.tool, str): + raise ValueError("Cannot call a tool that is a string reference.") + res = self.tool(**(self.params.model_dump() if isinstance(self.params, BaseModel) else self.params), + _tool_call_id=self.tool_call_id) return ContentBlock(tool_result=res) def call_and_collect_as_message(self): @@ -135,7 +166,7 @@ class ContentBlock(BaseModel): # This breaks us maintaing parity with the openai python client in some sen but so does image. def __init__(self, *args, **kwargs): - if "image" in kwargs and not isinstance(kwargs["image"], ImageContent): + if "image" in kwargs and kwargs['image'] is not None and not isinstance(kwargs["image"], ImageContent): # todo(alex). are we looking for dict here? im = kwargs["image"] = ImageContent.coerce(kwargs["image"]) # XXX: Backwards compatibility, Deprecate. if (d := kwargs.get("image_detail", None)): im.detail = d @@ -255,8 +286,10 @@ def coerce(cls, content: AnyContent) -> "ContentBlock": return cls(image=ImageContent.coerce(content)) if isinstance(content, BaseModel): return cls(parsed=content) + if isinstance(content, dict): + return cls(**content) - raise ValueError(f"Invalid content type: {type(content)}") + raise ValueError(f"Invalid ContentBlock content type: {type(content)}") @field_serializer('parsed') def serialize_parsed(self, value: Optional[BaseModel], _info): @@ -303,7 +336,7 @@ def to_content_blocks( if not isinstance(content, list): content = [content] - + return [ContentBlock.model_validate(ContentBlock.coerce(c)) for c in content] @@ -437,22 +470,7 @@ def serialize_content(self, content: List[ContentBlock]): for block in content ] - @classmethod - def model_validate(cls, obj: Any) -> 'Message': - """Custom validation to handle deserialization""" - if isinstance(obj, dict): - if 'content' in obj and isinstance(obj['content'], list): - content_blocks = [] - for block in obj['content']: - if isinstance(block, dict): - if 'text' in block: - block['text'] = str(block['text']) if block['text'] is not None else None - content_blocks.append(ContentBlock.model_validate(block)) - else: - content_blocks.append(ContentBlock.coerce(block)) - obj['content'] = content_blocks - return super().model_validate(obj) - + #todo(alex): needed? @classmethod def model_validate_json(cls, json_str: str) -> 'Message': """Custom validation to handle deserialization from JSON string""" diff --git a/src/ell/types/serialize.py b/src/ell/types/serialize.py index aff3c0345..a363df686 100644 --- a/src/ell/types/serialize.py +++ b/src/ell/types/serialize.py @@ -3,7 +3,7 @@ from functools import cached_property from typing import Any, Dict, List, Optional, Union -from pydantic import BaseModel, AwareDatetime, Field +from pydantic import BaseModel, AwareDatetime, Field, field_serializer, field_validator from ell.types.lmp import LMPType from ell.types.message import Message @@ -57,19 +57,18 @@ class LMP(BaseModel): class GetLMPInput(BaseModel): id: str -GetLMPOutput = Optional[LMP] -InvocationResults = Union[List[Message], Any] +GetLMPOutput = Optional[LMP] class InvocationContents(BaseModel): - invocation_id: str = Field(default_factory=lambda: str(uuid.uuid4())) - params: Optional[Dict[str, Any]] = None - results: Optional[InvocationResults] = None - invocation_api_params: Optional[Dict[str, Any]] = None - global_vars: Optional[Dict[str, Any]] = None - free_vars: Optional[Dict[str, Any]] = None - is_external: bool = Field(default=False) + invocation_id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="ID of the invocation the contents belong to") + params: Optional[Dict[str, Any]] = Field(description="The parameters of the LMP at the time of the invocation", default=None) + results: Optional[List[Message]] = Field(description="The output of the invocation as a list of ell Messages", default=None) + invocation_api_params: Optional[Dict[str, Any]] = Field(description="Arguments the model API was called with", default=None) + global_vars: Optional[Dict[str, Any]] = Field(description="Global variable bindings and their values at the time of the invocation", default=None) + free_vars: Optional[Dict[str, Any]] = Field(description="Free variable bindings and their values at the time of the invocation", default=None) + is_external: bool = Field(default=False, description="Whether the invocation contents are stored externally in a blob store. If they are they can be retrieved by 'invocation-{invocation_id}'.") @cached_property def total_size_bytes(self) -> int: @@ -84,6 +83,7 @@ def total_size_bytes(self) -> int: self.global_vars, self.free_vars ] + # todo(alex): we may want to bring this in line with other json serialization return sum( len(json.dumps(field, default=(lambda x: json.dumps(x.model_dump(), default=str, ensure_ascii=False) if isinstance(x, BaseModel) else str(x)), ensure_ascii=False).encode('utf-8')) @@ -101,7 +101,7 @@ class Invocation(BaseModel): """ id: Optional[str] = None lmp_id: str - latency_ms: int + latency_ms: float prompt_tokens: Optional[int] = None completion_tokens: Optional[int] = None state_cache_key: Optional[str] = None @@ -109,6 +109,24 @@ class Invocation(BaseModel): used_by_id: Optional[str] = None contents: InvocationContents + # Note: we must set to always right now, because the global json serializer calls model_dump instead of + # model_dump_json and then json.dumps with default of repr. would prefer when_used=json but + # tbh it's probably not needed as i think pydantic already handles this for json + + @field_serializer('created_at', when_used='always') + def serialize_date(self, created_at: AwareDatetime): + return str(created_at) + + @field_validator('created_at', mode="before") + def deserialize_and_validate_date(cls, created_at: Union[str, AwareDatetime]): + if isinstance(created_at, str): + dt = datetime.fromisoformat(created_at) + if dt.tzinfo is None: + raise ValueError( + "Datetime string must include timezone information") + return dt + return created_at + class WriteInvocationInput(BaseModel): """ @@ -133,7 +151,6 @@ class WriteBlobInput(BaseModel): metadata: Optional[Dict[str, Any]] = None - # class Blob(BaseModel): # blob_id: str # blob: bytes diff --git a/src/ell/util/serialization.py b/src/ell/util/serialization.py index 855004caa..3d1d76d50 100644 --- a/src/ell/util/serialization.py +++ b/src/ell/util/serialization.py @@ -53,7 +53,19 @@ def serialize_image(img): ) def unstructure_lstr(obj): - return dict(content=str(obj), **obj.__dict__, __lstr=True) + if isinstance(obj, str): + return dict(content=obj, __lstr=True) + origin_trace = obj.__dict__.__origin_trace__ + if origin_trace and isinstance(origin_trace, frozenset): + return dict(content=str(obj), + **obj.__dict__, + origin_trace=list(sorted(origin_trace)), + __lstr=True) + + return dict(content=str(obj), + **obj.__dict__, + __lstr=True) + pydantic_ltype_aware_cattr.register_unstructure_hook( _lstr, diff --git a/tests/api/test_api.py b/tests/api/test_api.py index 4ac869aa3..421d6aeb0 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -4,12 +4,16 @@ import pytest from typing import Any, Dict from fastapi.testclient import TestClient +from pydantic import BaseModel, Field, ValidationError +import ell +from ell import Message from ell.serialize.sqlite import SQLiteSerializer, AsyncSQLiteSerializer from ell.api.server import create_app, get_pubsub, get_serializer from ell.api.config import Config from ell.api.logger import setup_logging -from ell.types.serialize import utc_now +from ell.types import ToolCall +from ell.types.serialize import utc_now, Invocation, InvocationContents from ell.stores.studio import SerializedLMP from ell.types.lmp import LMPType from ell.types.serialize import WriteLMPInput @@ -19,10 +23,12 @@ def sqlite_serializer() -> SQLiteSerializer: return SQLiteSerializer(":memory:") + @pytest.fixture def async_sqlite_serializer() -> AsyncSQLiteSerializer: return AsyncSQLiteSerializer(":memory:") + def test_construct_serialized_lmp(): serialized_lmp = SerializedLMP( lmp_id="test_lmp_id", @@ -100,7 +106,6 @@ def create_test_app(serializer: AsyncSQLiteSerializer): async def get_publisher_override(): yield publisher - def get_serializer_override(): return serializer @@ -151,6 +156,7 @@ def test_write_lmp(async_sqlite_serializer: AsyncSQLiteSerializer): def test_write_invocation(async_sqlite_serializer: AsyncSQLiteSerializer): _app, client, *_ = create_test_app(async_sqlite_serializer) + # first write an lmp.. lmp_id = uuid4().hex lmp_data: Dict[str, Any] = { "lmp_id": lmp_id, @@ -178,8 +184,12 @@ def test_write_invocation(async_sqlite_serializer: AsyncSQLiteSerializer): "global_vars": {"global_var1": "value1"}, "free_vars": {"free_var1": "value2"}, "latency_ms": 100.0, - "invocation_kwargs": {"model": "gpt-4o", "messages": [{"role": "system", "content": "You are a JSON parser. You respond only in JSON. Do not format using markdown."}, {"role": "user", "content": "You are given the following task: \"What is two plus two?\"\n Parse the task into the following type:\n {'$defs': {'Add': {'properties': {'op': {'const': '+', 'enum': ['+'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Add', 'type': 'object'}, 'Div': {'properties': {'op': {'const': '/', 'enum': ['/'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Div', 'type': 'object'}, 'Mul': {'properties': {'op': {'const': '*', 'enum': ['*'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Mul', 'type': 'object'}, 'Sub': {'properties': {'op': {'const': '-', 'enum': ['-'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Sub', 'type': 'object'}}, 'anyOf': [{'$ref': '#/$defs/Add'}, {'$ref': '#/$defs/Sub'}, {'$ref': '#/$defs/Mul'}, {'$ref': '#/$defs/Div'}]}\n "}], "lm_kwargs": {"temperature": 0.1}, "client": None}, - "contents": { } + "invocation_kwargs": {"model": "gpt-4o", "messages": [{"role": "system", + "content": "You are a JSON parser. You respond only in JSON. Do not format using markdown."}, + {"role": "user", + "content": "You are given the following task: \"What is two plus two?\"\n Parse the task into the following type:\n {'$defs': {'Add': {'properties': {'op': {'const': '+', 'enum': ['+'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Add', 'type': 'object'}, 'Div': {'properties': {'op': {'const': '/', 'enum': ['/'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Div', 'type': 'object'}, 'Mul': {'properties': {'op': {'const': '*', 'enum': ['*'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Mul', 'type': 'object'}, 'Sub': {'properties': {'op': {'const': '-', 'enum': ['-'], 'title': 'Op', 'type': 'string'}, 'a': {'title': 'A', 'type': 'number'}, 'b': {'title': 'B', 'type': 'number'}}, 'required': ['op', 'a', 'b'], 'title': 'Sub', 'type': 'object'}}, 'anyOf': [{'$ref': '#/$defs/Add'}, {'$ref': '#/$defs/Sub'}, {'$ref': '#/$defs/Mul'}, {'$ref': '#/$defs/Div'}]}\n "}], + "lm_kwargs": {"temperature": 0.1}, "client": None}, + "contents": {} } consumes_data = [] @@ -197,5 +207,105 @@ def test_write_invocation(async_sqlite_serializer: AsyncSQLiteSerializer): # assert response.json() == input +class MySampleToolInput(BaseModel): + sample_property: str = Field("A thing") + + +@ell.tool() +def my_sample_tool(args: MySampleToolInput = Field( + description="The full name of a city and country, e.g. San Francisco, CA, USA")): + return '42' + + +def test_invocation_json_round_trip(): + invocation_id = "invocation-" + uuid4().hex + tool_call = ToolCall( + tool=my_sample_tool, + tool_call_id=uuid4().hex, + params=MySampleToolInput(sample_property="test"), + ) + invocation_contents = InvocationContents( + invocation_id=invocation_id, + results=[Message(role='user', content=[tool_call])] + ) + invocation = Invocation( + id=invocation_id, + lmp_id=uuid4().hex, + latency_ms=42.0, + contents=invocation_contents, + created_at=utc_now() + ) + + # Serialize + result = invocation.model_dump() + + # Deserialize + _invocation=None + try: + _invocation = Invocation.model_validate(result) + except ValidationError as e: + import json + print("\nJSON errors:") + print(json.dumps(e.errors(), default=str,indent=2)) + + # Should be equal + # Except that: + # ToolCall before / after serialization: + # 1. `tool` is a function vs a string + # 2. `params` is a BaseModel (in userland) vs a dictionary + # These are not equivalent + + # What should be equivalent: deserialized forms of serialized forms + assert _invocation.model_dump() == result + +def test_write_invocation_tool_call(async_sqlite_serializer: AsyncSQLiteSerializer): + _app, client, *_ = create_test_app(async_sqlite_serializer) + + # first write an lmp.. + lmp_id = uuid4().hex + lmp_data: Dict[str, Any] = { + "lmp_id": lmp_id, + "name": "Test LMP", + "source": "def test_function(): pass", + "dependencies": str(["dep1", "dep2"]), + "lmp_type": LMPType.LM, + "api_params": {"param1": "value1"}, + } + response = client.post( + "/lmp", + json={'lmp': lmp_data, 'uses': []} + ) + try: + assert response.status_code == 200 + except Exception as e: + print(response.json()) + raise e + + invocation_id = "invocation-" + uuid4().hex + tool_call = ToolCall( + tool=my_sample_tool, + tool_call_id=uuid4().hex, + params=MySampleToolInput(sample_property="test"), + ) + invocation_contents = InvocationContents( + invocation_id=invocation_id, + results=[Message(role='user', content=[tool_call])] + ) + invocation = Invocation( + id=invocation_id, + lmp_id=lmp_id, + latency_ms=42.0, + contents=invocation_contents, + created_at=utc_now() + ) + + response = client.post( + "/invocation", + json={'invocation':invocation.model_dump(),'consumes':[]} + ) + print(response.json()) + assert response.status_code == 200 + + if __name__ == "__main__": pytest.main() diff --git a/tests/test_message_type.py b/tests/test_message_type.py index 8fd476ad4..aaef96029 100644 --- a/tests/test_message_type.py +++ b/tests/test_message_type.py @@ -1,3 +1,5 @@ +from uuid import uuid4 + import pytest from pydantic import BaseModel import ell @@ -143,4 +145,26 @@ def test_message_json_serialization(): assert loaded_message.role == original_message.role assert len(loaded_message.content) == len(original_message.content) - assert str(loaded_message.content[0].text) == str(original_message.content[0].text) \ No newline at end of file + assert str(loaded_message.content[0].text) == str(original_message.content[0].text) + +def test_tool_call_json_serialization(): + class MySampleToolInput(BaseModel): + sample_property: str + + @ell.tool() + def my_sample_tool(args: MySampleToolInput): + return '42' + + original_message = Message(role='assistant', content=[ + ToolCall( + tool=my_sample_tool, + tool_call_id=uuid4().hex, + params=MySampleToolInput(sample_property="test"), + )]) + + message_json = original_message.model_dump_json() + loaded_message = Message.model_validate_json(message_json) + + assert loaded_message.role == original_message.role + assert len(loaded_message.content) == len(original_message.content) + assert str(loaded_message.content[0].text) == str(original_message.content[0].text) From ff3327bb2b52613538721c262c1fa4fc42ed9d67 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 17 Nov 2024 11:30:14 -0800 Subject: [PATCH 28/40] fix typo --- src/ell/lmp/tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ell/lmp/tool.py b/src/ell/lmp/tool.py index fd6c074f9..4abc32685 100644 --- a/src/ell/lmp/tool.py +++ b/src/ell/lmp/tool.py @@ -97,7 +97,7 @@ def wrapper( # Determine the type annotation if param.annotation == inspect.Parameter.empty: - raise ValueError(f"Parameter {param_name} has no type annotation, and cannot be converted into a tool schema for OpenAI and other provisders. Should OpenAI produce a string or an integer, etc, for this parameter?") + raise ValueError(f"Parameter {param_name} has no type annotation, and cannot be converted into a tool schema for OpenAI and other providers. Should OpenAI produce a string or an integer, etc, for this parameter?") annotation = param.annotation # Determine the default value From 3dc43446a6ddec0b6c7bbd1cb53542cf3e36bdf3 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 17 Nov 2024 11:37:38 -0800 Subject: [PATCH 29/40] tool call params dict or basemodel --- src/ell/types/message.py | 49 ++++++++++++++++++-------------------- tests/test_message_type.py | 5 ++-- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/ell/types/message.py b/src/ell/types/message.py index 24e026438..14e398413 100644 --- a/src/ell/types/message.py +++ b/src/ell/types/message.py @@ -1,19 +1,17 @@ # todo: implement tracing for structured outs. this a v2 feature. +import base64 import json -from ell.types._lstr import _lstr +from concurrent.futures import ThreadPoolExecutor, as_completed from functools import cached_property -import numpy as np -import base64 from io import BytesIO -from PIL import Image as PILImage from types import FunctionType +from typing import Any, Callable, Dict, List, Optional, Union -from pydantic import BaseModel, ConfigDict, Field, model_validator, field_serializer, field_validator - -from concurrent.futures import ThreadPoolExecutor, as_completed - -from typing import Any, Callable, Dict, List, Optional, Union, Type +import numpy as np +from PIL import Image as PILImage +from pydantic import BaseModel, ConfigDict, Field, model_validator, field_serializer, model_serializer +from ell.types._lstr import _lstr from ell.util.serialization import serialize_image, unstructure_lstr _lstr_generic = Union[_lstr, str] @@ -45,9 +43,15 @@ def __repr__(self): class ToolCall(BaseModel): tool: Union[InvocableTool, str] = Field(description="The tool function to call or a reference to it when serialized") tool_call_id: Optional[_lstr_generic] = Field(default=None) - # todo. If we include BaseModel in this union instead of Any, then pydantic - # constructs `BaseModel()` when we call super().__init__ with a dictionary - params: Union[Any, Dict[str, Any]] + params: Union[Dict[str, Any], BaseModel] + + def __init__(self, tool, params: Optional[Union[BaseModel, Dict[str, Any]]], tool_call_id: Optional[_lstr_generic]=None): + if (not isinstance(params, BaseModel)) and isinstance(tool, FunctionType) and hasattr(tool, '__ell_params_model__'): + params = tool.__ell_params_model__(**params) + if isinstance(tool_call_id, dict): + tool_call_id = _lstr(content=tool_call_id['content'], origin_trace=tool_call_id.get('__origin_trace__'), logits=tool_call_id.get('logits')) + + super().__init__(tool=tool, tool_call_id=tool_call_id, params=params) # TODO. This should reference a tool fqn + version if possible # ell should have an InvocableTool with __ properties that have this info at serialization time @@ -55,10 +59,11 @@ class ToolCall(BaseModel): def serialize_tool(self, tool: InvocableTool, _info): return tool.__name__ if hasattr(tool, '__name__') else str(tool) - # @field_serializer('params') - # def serialize_params(self, params: BaseModel, _info): - # # Explicitly serialize the params BaseModel - # return params.model_dump(exclude_none=True) + @field_serializer('params') + def serialize_params(self, params: Union[Dict[str,Any],BaseModel], _info): + if isinstance(params, dict): + return params + return params.model_dump(exclude_none=True, exclude_unset=True) @field_serializer('tool_call_id') def serialize_tool_call_id(self, tool_call_id: _lstr_generic): @@ -69,17 +74,9 @@ def serialize_tool_call_id(self, tool_call_id: _lstr_generic): return unstructure_lstr(tool_call_id) return tool_call_id - def __init__(self, tool, params : Union[BaseModel, Dict[str, Any]], tool_call_id=None): - if isinstance(tool, FunctionType) and hasattr(tool, '__ell_params_model__'): - params = tool.__ell_params_model__(**params) #convenience. - if isinstance(tool_call_id, dict): - tool_call_id = _lstr(content=tool_call_id['content'], - origin_trace=tool_call_id.get('__origin_trace__'), - logits=tool_call_id.get('logits')) - super().__init__(tool=tool, tool_call_id=tool_call_id, params=params) - def __call__(self, **kwargs): assert not kwargs, "Unexpected arguments provided. Calling a tool uses the params provided in the ToolCall." + assert not isinstance(self.tool, str), "ToolCall.tool is a string. Tools are not invocable once serialized." # XXX: TODO: MOVE TRACKING CODE TO _TRACK AND OUT OF HERE AND API. return self.tool(**self.params.model_dump()) @@ -99,7 +96,7 @@ def call_and_collect_as_message(self): return Message(role="user", content=[self.call_and_collect_as_message_block()]) def __repr__(self): - return f"{self.__class__.__name__}({self.tool.__name__}({self.params}), tool_call_id='{self.tool_call_id}')" + return f"{self.__class__.__name__}({self.tool.__name__ if hasattr(self.tool, '__name__') else str(self.tool)}({self.params}), tool_call_id='{self.tool_call_id}')" class ImageContent(BaseModel): diff --git a/tests/test_message_type.py b/tests/test_message_type.py index aaef96029..d6feac0bd 100644 --- a/tests/test_message_type.py +++ b/tests/test_message_type.py @@ -158,12 +158,13 @@ def my_sample_tool(args: MySampleToolInput): original_message = Message(role='assistant', content=[ ToolCall( tool=my_sample_tool, - tool_call_id=uuid4().hex, - params=MySampleToolInput(sample_property="test"), + tool_call_id=f'call_{uuid4().hex}', + params={'args': MySampleToolInput(sample_property="test")}, )]) message_json = original_message.model_dump_json() loaded_message = Message.model_validate_json(message_json) + assert loaded_message.tool_calls[0].params == {'args': {'sample_property': 'test'}} assert loaded_message.role == original_message.role assert len(loaded_message.content) == len(original_message.content) From 26d1eeb84332da1d512e334f22f3580db2d05a6f Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Sun, 17 Nov 2024 11:38:51 -0800 Subject: [PATCH 30/40] properly serialize tool call params in providers --- src/ell/providers/anthropic.py | 5 ++++- src/ell/providers/bedrock.py | 5 ++++- src/ell/providers/openai.py | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/ell/providers/anthropic.py b/src/ell/providers/anthropic.py index 2002f0ea8..4a3d8ba29 100644 --- a/src/ell/providers/anthropic.py +++ b/src/ell/providers/anthropic.py @@ -1,4 +1,7 @@ from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Type, Union, cast + +from pydantic import BaseModel + from ell.provider import EllCallParams, Metadata, Provider from ell.types import Message, ContentBlock, ToolCall, ImageContent @@ -194,7 +197,7 @@ def _content_block_to_anthropic_format(content_block: ContentBlock): type="tool_use", id=tool_call.tool_call_id, name=tool_call.tool.__name__, - input=tool_call.params.model_dump() + input=tool_call.params.model_dump() if isinstance(tool_call.params, BaseModel) else tool_call.params, ) elif (tool_result := content_block.tool_result): return dict( diff --git a/src/ell/providers/bedrock.py b/src/ell/providers/bedrock.py index e99ff75a7..8c69948be 100644 --- a/src/ell/providers/bedrock.py +++ b/src/ell/providers/bedrock.py @@ -1,6 +1,9 @@ from abc import ABC, abstractmethod from collections import defaultdict from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union, cast + +from pydantic import BaseModel + from ell.provider import EllCallParams, Metadata, Provider from ell.types import Message, ContentBlock, ToolCall, ImageContent from ell.types._lstr import _lstr @@ -199,7 +202,7 @@ def content_block_to_bedrock_format(content_block: ContentBlock) -> Dict[str, An "toolUse": { "toolUseId": content_block.tool_call.tool_call_id, "name": content_block.tool_call.tool.__name__, - "input": content_block.tool_call.params.model_dump() + "input": content_block.tool_call.params.model_dump() if isinstance(content_block.tool_call.params, BaseModel) else content_block.tool_call.params, } } elif content_block.tool_result: diff --git a/src/ell/providers/openai.py b/src/ell/providers/openai.py index 8fe7f4d1b..b3cca688d 100644 --- a/src/ell/providers/openai.py +++ b/src/ell/providers/openai.py @@ -64,7 +64,7 @@ def translate_to_provider(self, ell_call : EllCallParams) -> Dict[str, Any]: type="function", function=dict( name=tool_call.tool.__name__, - arguments=json.dumps(tool_call.params.model_dump(), ensure_ascii=False) + arguments=tool_call.params.model_dump_json() if isinstance(tool_call.params,BaseModel) else json.dumps(tool_call.params, ensure_ascii=False) ) ) for tool_call in tool_calls ], role="assistant", From 1276eb6ddd7fb8b7d38918dabf1e06ffd5414d0f Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Mon, 18 Nov 2024 07:47:38 -0800 Subject: [PATCH 31/40] simplify params serialization, add tool reference --- src/ell/providers/anthropic.py | 2 +- src/ell/providers/bedrock.py | 2 +- src/ell/providers/openai.py | 2 +- src/ell/types/message.py | 38 ++++++++++++++++++++++------------ tests/api/test_api.py | 4 ++++ 5 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/ell/providers/anthropic.py b/src/ell/providers/anthropic.py index 4a3d8ba29..ba379b09b 100644 --- a/src/ell/providers/anthropic.py +++ b/src/ell/providers/anthropic.py @@ -197,7 +197,7 @@ def _content_block_to_anthropic_format(content_block: ContentBlock): type="tool_use", id=tool_call.tool_call_id, name=tool_call.tool.__name__, - input=tool_call.params.model_dump() if isinstance(tool_call.params, BaseModel) else tool_call.params, + input=tool_call.serialize_params(), ) elif (tool_result := content_block.tool_result): return dict( diff --git a/src/ell/providers/bedrock.py b/src/ell/providers/bedrock.py index 8c69948be..fc0a0cd6f 100644 --- a/src/ell/providers/bedrock.py +++ b/src/ell/providers/bedrock.py @@ -202,7 +202,7 @@ def content_block_to_bedrock_format(content_block: ContentBlock) -> Dict[str, An "toolUse": { "toolUseId": content_block.tool_call.tool_call_id, "name": content_block.tool_call.tool.__name__, - "input": content_block.tool_call.params.model_dump() if isinstance(content_block.tool_call.params, BaseModel) else content_block.tool_call.params, + "input": content_block.tool_call.serialize_params(), } } elif content_block.tool_result: diff --git a/src/ell/providers/openai.py b/src/ell/providers/openai.py index b3cca688d..ef85ff416 100644 --- a/src/ell/providers/openai.py +++ b/src/ell/providers/openai.py @@ -64,7 +64,7 @@ def translate_to_provider(self, ell_call : EllCallParams) -> Dict[str, Any]: type="function", function=dict( name=tool_call.tool.__name__, - arguments=tool_call.params.model_dump_json() if isinstance(tool_call.params,BaseModel) else json.dumps(tool_call.params, ensure_ascii=False) + arguments=json.dumps(tool_call.serialize_params(), ensure_ascii=False) ) ) for tool_call in tool_calls ], role="assistant", diff --git a/src/ell/types/message.py b/src/ell/types/message.py index 14e398413..852b4d6f4 100644 --- a/src/ell/types/message.py +++ b/src/ell/types/message.py @@ -40,10 +40,15 @@ def text_only(self) -> str: def __repr__(self): return f"{self.__class__.__name__}(tool_call_id={self.tool_call_id}, result={_content_to_text(self.result)})" +class ToolReference(BaseModel): + """A reference to an invocable tool""" + fqn: str = Field(description="The fully qualified name of the tool") + hash: str = Field(description="The hash of the tool and its dependencies") + class ToolCall(BaseModel): - tool: Union[InvocableTool, str] = Field(description="The tool function to call or a reference to it when serialized") + tool: Union[InvocableTool, ToolReference] = Field(description="The tool function to call or a reference to it when serialized") tool_call_id: Optional[_lstr_generic] = Field(default=None) - params: Union[Dict[str, Any], BaseModel] + params: Union[Dict[str, Any], BaseModel] = Field(description="Arguments for the tool call provided by the model.") def __init__(self, tool, params: Optional[Union[BaseModel, Dict[str, Any]]], tool_call_id: Optional[_lstr_generic]=None): if (not isinstance(params, BaseModel)) and isinstance(tool, FunctionType) and hasattr(tool, '__ell_params_model__'): @@ -53,18 +58,25 @@ def __init__(self, tool, params: Optional[Union[BaseModel, Dict[str, Any]]], too super().__init__(tool=tool, tool_call_id=tool_call_id, params=params) - # TODO. This should reference a tool fqn + version if possible - # ell should have an InvocableTool with __ properties that have this info at serialization time @field_serializer('tool') - def serialize_tool(self, tool: InvocableTool, _info): - return tool.__name__ if hasattr(tool, '__name__') else str(tool) + def serialize_tool(self, tool: Union[InvocableTool, ToolReference], _info): + if isinstance(tool, ToolReference): + return tool + return ToolReference( + # todo(alex). add the value of fqn we want to standardize on to all lmps so we don't keep using qualname + fqn=tool.__qualname__, + hash=getattr(tool, '__ell_hash__', 'unknown') + ) @field_serializer('params') - def serialize_params(self, params: Union[Dict[str,Any],BaseModel], _info): + def _serialize_params(self, params: Union[Dict[str, Any], BaseModel]) -> Dict[str, Any]: if isinstance(params, dict): return params return params.model_dump(exclude_none=True, exclude_unset=True) + def serialize_params(self) -> Dict[str, Any]: + return self._serialize_params(self.params) + @field_serializer('tool_call_id') def serialize_tool_call_id(self, tool_call_id: _lstr_generic): if tool_call_id is None: @@ -76,19 +88,19 @@ def serialize_tool_call_id(self, tool_call_id: _lstr_generic): def __call__(self, **kwargs): assert not kwargs, "Unexpected arguments provided. Calling a tool uses the params provided in the ToolCall." - assert not isinstance(self.tool, str), "ToolCall.tool is a string. Tools are not invocable once serialized." + assert not isinstance(self.tool, ToolReference), f"Tools are not invocable once serialized. ToolCall.tool is a ToolReference: {self.tool}" # XXX: TODO: MOVE TRACKING CODE TO _TRACK AND OUT OF HERE AND API. - return self.tool(**self.params.model_dump()) + return self.tool(**self.serialize_params()) # XXX: Deprecate in 0.1.0 def call_and_collect_as_message_block(self): raise DeprecationWarning("call_and_collect_as_message_block is deprecated. Use collect_as_content_block instead.") def call_and_collect_as_content_block(self): - if isinstance(self.tool, str): - raise ValueError("Cannot call a tool that is a string reference.") - res = self.tool(**(self.params.model_dump() if isinstance(self.params, BaseModel) else self.params), + if isinstance(self.tool, ToolReference): + raise ValueError(f"Cannot call a tool that is a ToolReference: {self.tool}") + res = self.tool(**self.serialize_params(), _tool_call_id=self.tool_call_id) return ContentBlock(tool_result=res) @@ -203,7 +215,7 @@ def type(self): @property def content(self): - return getattr(self, self.type) + return getattr(self, self.type) # type: ignore @classmethod def coerce(cls, content: AnyContent) -> "ContentBlock": diff --git a/tests/api/test_api.py b/tests/api/test_api.py index 421d6aeb0..d478c176a 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -218,6 +218,8 @@ def my_sample_tool(args: MySampleToolInput = Field( def test_invocation_json_round_trip(): + # pretend it's being tracked + my_sample_tool.__ell_hash__ = "lmp-123" invocation_id = "invocation-" + uuid4().hex tool_call = ToolCall( tool=my_sample_tool, @@ -281,6 +283,8 @@ def test_write_invocation_tool_call(async_sqlite_serializer: AsyncSQLiteSerializ print(response.json()) raise e + # pretend it's being tracked + my_sample_tool.__ell_hash__ = "lmp-123" invocation_id = "invocation-" + uuid4().hex tool_call = ToolCall( tool=my_sample_tool, From 7c935dbe66f4c90b1bc117433a90f0061791da37 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Mon, 18 Nov 2024 17:38:20 -0800 Subject: [PATCH 32/40] content block parsed serde --- src/ell/types/message.py | 13 +++++++++++-- tests/test_message_type.py | 12 ++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/ell/types/message.py b/src/ell/types/message.py index 852b4d6f4..e11566e09 100644 --- a/src/ell/types/message.py +++ b/src/ell/types/message.py @@ -9,7 +9,7 @@ import numpy as np from PIL import Image as PILImage -from pydantic import BaseModel, ConfigDict, Field, model_validator, field_serializer, model_serializer +from pydantic import BaseModel, ConfigDict, Field, model_validator, field_serializer, field_validator from ell.types._lstr import _lstr from ell.util.serialization import serialize_image, unstructure_lstr @@ -169,7 +169,7 @@ class ContentBlock(BaseModel): image: Optional[ImageContent] = Field(default=None) audio: Optional[Union[np.ndarray, List[float]]] = Field(default=None) tool_call: Optional[ToolCall] = Field(default=None) - parsed: Optional[BaseModel] = Field(default=None) + parsed: Optional[Union[Dict[str, Any], BaseModel]] = Field(default=None) tool_result: Optional[ToolResult] = Field(default=None) # TODO: Add a JSON type? This would be nice for response_format. This is different than resposne_format = model. Or we could be opinionated and automatically parse the json response. That might be nice. # This breaks us maintaing parity with the openai python client in some sen but so does image. @@ -305,6 +305,15 @@ def serialize_parsed(self, value: Optional[BaseModel], _info): if value is None: return None return value.model_dump(exclude_none=True, exclude_unset=True) + + @field_validator('parsed' ,mode='wrap') + def deserialize_parsed(cls, value: Optional[Union[Dict[str, Any],BaseModel]], _info): + # Why must we do this? + # pydantic returns an empty BaseModel() whenever parsed is a dict + if value is None or isinstance(value, (dict,BaseModel)): + return value + raise ValueError(f"Invalid ContentBlock.parsed value: {type(value)}") + def to_content_blocks( diff --git a/tests/test_message_type.py b/tests/test_message_type.py index d6feac0bd..4e31d122f 100644 --- a/tests/test_message_type.py +++ b/tests/test_message_type.py @@ -169,3 +169,15 @@ def my_sample_tool(args: MySampleToolInput): assert loaded_message.role == original_message.role assert len(loaded_message.content) == len(original_message.content) assert str(loaded_message.content[0].text) == str(original_message.content[0].text) + +def test_parsed_json_serialization(): + class DummyFormattedResponse(BaseModel): + field1: str + field2: int + + original_message = Message(role='assistant', content=[ContentBlock(parsed=DummyFormattedResponse(field1="test", field2=42))]) + message_json = original_message.model_dump_json() + loaded_message = Message.model_validate_json(message_json) + assert loaded_message.content[0].parsed == {'field1': 'test', 'field2': 42} + + \ No newline at end of file From e57249201df42bc16c3146b3f2aee2054590dd2c Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Tue, 19 Nov 2024 06:02:31 -0800 Subject: [PATCH 33/40] refactor(serialize): remove redundant code in sql store wrappers --- src/ell/serialize/postgres.py | 91 +++-------------------------------- src/ell/serialize/sql.py | 8 +-- src/ell/serialize/sqlite.py | 82 +++---------------------------- 3 files changed, 17 insertions(+), 164 deletions(-) diff --git a/src/ell/serialize/postgres.py b/src/ell/serialize/postgres.py index 9fe03634f..82a0da83a 100644 --- a/src/ell/serialize/postgres.py +++ b/src/ell/serialize/postgres.py @@ -1,95 +1,16 @@ -from typing import List, Optional, Dict, Any +from typing import Optional +from ell.serialize.sql import SQLSerializer, AsyncSQLSerializer from ell.stores.sql import PostgresStore from ell.stores.store import BlobStore, AsyncBlobStore -from ell.stores.studio import Invocation, SerializedLMP -from ell.types.serialize import LMP, WriteLMPInput, WriteInvocationInput -from ell.serialize.protocol import EllSerializer, EllAsyncSerializer -class PostgresSerializer(EllSerializer): +class PostgresSerializer(SQLSerializer): def __init__(self, db_uri: str, blob_store: Optional[BlobStore] = None): - self.store = PostgresStore(db_uri, blob_store) - self.supports_blobs = blob_store is not None - - def get_lmp(self, lmp_id: str): - lmp = self.store.get_lmp(lmp_id) - if lmp: - return LMP(**lmp.model_dump()) - return None - - def get_lmp_versions(self, fqn: str) -> List[LMP]: - slmps = self.store.get_versions_by_fqn(fqn) - return [LMP(**slmp.model_dump()) for slmp in slmps] - - def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - model = SerializedLMP.from_api(lmp) - self.store.write_lmp(model, uses) - - def write_invocation(self, input: WriteInvocationInput) -> None: - invocation = Invocation.from_api(input.invocation) - self.store.write_invocation(invocation, set(input.consumes)) - return None - - def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - if self.store.blob_store is None: - raise ValueError("Blob store is not enabled") - return self.store.blob_store.store_blob(blob=blob, blob_id=blob_id) - - def retrieve_blob(self, blob_id: str) -> bytes: - if self.store.blob_store is None: - raise ValueError("Blob store is not enabled") - return self.store.blob_store.retrieve_blob(blob_id) - - def close(self): - pass + super().__init__(PostgresStore(db_uri, blob_store)) # todo(async): the underlying store is not async-aware -class AsyncPostgresSerializer(EllAsyncSerializer): +class AsyncPostgresSerializer(AsyncSQLSerializer): def __init__(self, db_uri: str, blob_store: Optional[AsyncBlobStore] = None): - self.store = PostgresStore(db_uri, blob_store) - self.blob_store = blob_store - self.supports_blobs = blob_store is not None - - async def get_lmp(self, lmp_id: str) -> Optional[LMP]: - lmp = self.store.get_lmp(lmp_id) - if lmp: - return LMP(**lmp.model_dump()) - return None - - async def get_lmp_versions(self, fqn: str) -> List[LMP]: - slmps = self.store.get_versions_by_fqn(fqn) - return [LMP(**slmp.model_dump()) for slmp in slmps] - - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - model = SerializedLMP.from_api(lmp) - self.store.write_lmp(model, uses) - - async def write_invocation(self, input: WriteInvocationInput) -> None: - invocation = Invocation.from_api(input.invocation) - self.store.write_invocation( - invocation, - set(input.consumes) - ) - return None - - async def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - if self.blob_store is None: - raise ValueError("Blob store is not enabled") - return await self.blob_store.store_blob(blob=blob, blob_id=blob_id) - - async def retrieve_blob(self, blob_id: str) -> bytes: - if self.blob_store is None: - raise ValueError("Blob store is not enabled") - return await self.blob_store.retrieve_blob(blob_id) - - async def close(self): - # todo. Do we have a close method? - pass - - async def __aenter__(self): - return self - - async def __aexit__(self): - await self.close() + super().__init__(PostgresStore(db_uri, blob_store)) diff --git a/src/ell/serialize/sql.py b/src/ell/serialize/sql.py index 4ab0fb560..f035b3e4c 100644 --- a/src/ell/serialize/sql.py +++ b/src/ell/serialize/sql.py @@ -1,13 +1,13 @@ from typing import List, Optional, Dict, Any -import ell.stores.store +from ell.stores.store import Store from ell.stores.studio import Invocation, SerializedLMP from ell.types.serialize import LMP, WriteLMPInput, WriteInvocationInput from ell.serialize.protocol import EllSerializer, EllAsyncSerializer class SQLSerializer(EllSerializer): - def __init__(self, store: ell.stores.store.Store ): + def __init__(self, store: Store): self.store = store self.supports_blobs = store.has_blob_storage @@ -46,7 +46,7 @@ def close(self): # todo(async): the underlying store and blob store is not async-aware class AsyncSQLSerializer(EllAsyncSerializer): - def __init__(self, store: ell.stores.store.Store): + def __init__(self, store: Store): self.store = store self.supports_blobs = store.has_blob_storage @@ -78,7 +78,7 @@ async def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[st return self.store.blob_store.store_blob(blob=blob, blob_id=blob_id) async def retrieve_blob(self, blob_id: str) -> bytes: - if self.blob_store is None: + if self.store.blob_store is None: raise ValueError("Blob store is not enabled") return self.store.blob_store.retrieve_blob(blob_id) diff --git a/src/ell/serialize/sqlite.py b/src/ell/serialize/sqlite.py index b005899db..49ea618fb 100644 --- a/src/ell/serialize/sqlite.py +++ b/src/ell/serialize/sqlite.py @@ -1,86 +1,18 @@ -from typing import List, Optional, Dict, Any +from typing import Optional -from ell.serialize.protocol import EllSerializer, EllAsyncSerializer +from ell.serialize.sql import SQLSerializer, AsyncSQLSerializer from ell.stores.sql import SQLiteStore from ell.stores.store import AsyncBlobStore, BlobStore -from ell.stores.studio import SerializedLMP, Invocation -from ell.types.serialize import WriteLMPInput, WriteInvocationInput, LMP -class SQLiteSerializer(EllSerializer): +class SQLiteSerializer(SQLSerializer): def __init__(self, storage_dir: str, blob_store: Optional[BlobStore] = None): - self.store = SQLiteStore(storage_dir, blob_store) - self.supports_blobs = True + super().__init__(SQLiteStore(storage_dir, blob_store)) - def get_lmp(self, lmp_id: str): - lmp = self.store.get_lmp(lmp_id) - if lmp: - return LMP(**lmp.model_dump()) - return None - def get_lmp_versions(self, fqn: str) -> List[LMP]: - slmps = self.store.get_versions_by_fqn(fqn) - return [LMP(**slmp.model_dump()) for slmp in slmps] - - def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - serialized_lmp = SerializedLMP.from_api(lmp) - self.store.write_lmp(serialized_lmp, uses) - - def write_invocation(self, input: WriteInvocationInput) -> None: - invocation = Invocation.from_api(input.invocation) - self.store.write_invocation(invocation, set(input.consumes)) - return None - - def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - return self.store.blob_store.store_blob(blob, blob_id) # type: ignore - - def retrieve_blob(self, blob_id: str) -> bytes: - return self.store.blob_store.retrieve_blob(blob_id) # type: ignore - - def close(self): - pass - - - -# todo(async). underlying store is not async-aware -class AsyncSQLiteSerializer(EllAsyncSerializer): +# todo(async). underlying store is not async +class AsyncSQLiteSerializer(AsyncSQLSerializer): def __init__(self, storage_dir: str, blob_store: Optional[AsyncBlobStore] = None): - self.store = SQLiteStore(storage_dir, blob_store) - self.blob_store = blob_store - self.supports_blobs = True - - async def get_lmp(self, lmp_id: str): - lmp = self.store.get_lmp(lmp_id) - if lmp: - return LMP(**lmp.model_dump()) - return None - - async def get_lmp_versions(self, fqn: str) -> List[LMP]: - slmps = self.store.get_versions_by_fqn(fqn) - return [LMP(**slmp.model_dump()) for slmp in slmps] - - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - serialized_lmp = SerializedLMP.from_api(lmp) - self.store.write_lmp(serialized_lmp, uses) - - async def write_invocation(self, input: WriteInvocationInput) -> None: - invocation = Invocation.from_api(input.invocation) - self.store.write_invocation(invocation, set(input.consumes)) - return None - - async def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: - return await self.blob_store.store_blob(blob, blob_id) # type: ignore - - async def retrieve_blob(self, blob_id: str) -> bytes: - return await self.blob_store.retrieve_blob(blob_id) # type: ignore - - async def close(self): - pass - - async def __aenter__(self): - return self - - async def __aexit__(self): - await self.close() + super().__init__(SQLiteStore(storage_dir, blob_store)) From ff609fdfad78bb52f001128d626ebe0728bf072d Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Tue, 19 Nov 2024 06:21:14 -0800 Subject: [PATCH 34/40] fix: created_at not optional if defaulted --- src/ell/types/serialize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ell/types/serialize.py b/src/ell/types/serialize.py index a363df686..2c4b4eae3 100644 --- a/src/ell/types/serialize.py +++ b/src/ell/types/serialize.py @@ -29,6 +29,7 @@ class WriteLMPInput(BaseModel): api_params: Optional[Dict[str, Any]] = None initial_free_vars: Optional[Dict[str, Any]] = None initial_global_vars: Optional[Dict[str, Any]] = None + created_at: AwareDatetime = Field(default_factory=utc_now) # TODO. dict or list? # uses: List[str] = Field(default_factory=list) @@ -36,7 +37,6 @@ class WriteLMPInput(BaseModel): # num_invocations: Optional[int] = None commit_message: Optional[str] = None version_number: Optional[int] = None - created_at: Optional[AwareDatetime] = Field(default_factory=utc_now) class LMP(BaseModel): From 441b69d7687da5c3f7c3699730dc7860c51dded9 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Tue, 19 Nov 2024 06:25:57 -0800 Subject: [PATCH 35/40] naming: use 'coerce' for conversions --- src/ell/serialize/sql.py | 8 ++++---- src/ell/stores/studio.py | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ell/serialize/sql.py b/src/ell/serialize/sql.py index f035b3e4c..d4cb26f44 100644 --- a/src/ell/serialize/sql.py +++ b/src/ell/serialize/sql.py @@ -22,11 +22,11 @@ def get_lmp_versions(self, fqn: str) -> List[LMP]: return [LMP(**slmp.model_dump()) for slmp in slmps] def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - model = SerializedLMP.from_api(lmp) + model = SerializedLMP.coerce(lmp) self.store.write_lmp(model, uses) def write_invocation(self, input: WriteInvocationInput) -> None: - invocation = Invocation.from_api(input.invocation) + invocation = Invocation.coerce(input.invocation) self.store.write_invocation(invocation, set(input.consumes)) return None @@ -61,11 +61,11 @@ async def get_lmp_versions(self, fqn: str) -> List[LMP]: return [LMP(**slmp.model_dump()) for slmp in slmps] async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: - model = SerializedLMP.from_api(lmp) + model = SerializedLMP.coerce(lmp) self.store.write_lmp(model, uses) async def write_invocation(self, input: WriteInvocationInput) -> None: - invocation = Invocation.from_api(input.invocation) + invocation = Invocation.coerce(input.invocation) self.store.write_invocation( invocation, set(input.consumes) diff --git a/src/ell/stores/studio.py b/src/ell/stores/studio.py index 9f46254ed..8e599950e 100644 --- a/src/ell/stores/studio.py +++ b/src/ell/stores/studio.py @@ -91,9 +91,9 @@ class SerializedLMP(SerializedLMPBase, table=True): ), ) - @staticmethod - def from_api(input: ell.types.serialize.WriteLMPInput): - return SerializedLMP( + @classmethod + def coerce(cls, input: ell.types.serialize.WriteLMPInput): + return cls( lmp_id=input.lmp_id, lmp_type=input.lmp_type, name=input.name, @@ -167,7 +167,7 @@ class InvocationContents(InvocationContentsBase, table=True): invocation: "Invocation" = Relationship(back_populates="contents") @classmethod - def from_api(cls, input: ell.types.serialize.InvocationContents): + def coerce(cls, input: ell.types.serialize.InvocationContents): return cls(**input.model_dump()) class Invocation(InvocationBase, table=True): @@ -199,7 +199,7 @@ class Invocation(InvocationBase, table=True): ) @classmethod - def from_api(cls, input: ell.types.serialize.Invocation): + def coerce(cls, input: ell.types.serialize.Invocation): fields = { field: getattr(input, field) for field in input.model_fields @@ -207,5 +207,5 @@ def from_api(cls, input: ell.types.serialize.Invocation): } return cls( **fields, - contents=InvocationContents.from_api(input.contents) + contents=InvocationContents.coerce(input.contents) ) \ No newline at end of file From 3db907ce034e9d95cc29a52ffb63b9e084edd174 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Tue, 19 Nov 2024 07:40:32 -0800 Subject: [PATCH 36/40] refactor: dedupe http error handling --- src/ell/serialize/http.py | 97 +++++++++++++++------------------------ 1 file changed, 36 insertions(+), 61 deletions(-) diff --git a/src/ell/serialize/http.py b/src/ell/serialize/http.py index 694951a20..9ff598d7f 100644 --- a/src/ell/serialize/http.py +++ b/src/ell/serialize/http.py @@ -17,17 +17,8 @@ # pydantic_ltype_aware_cattr.unstructure(obj), # sort_keys=True, default=repr, ensure_ascii=False) - -class EllHTTPSerializer(EllSerializer): - def __init__(self, base_url: str): - self.base_url = base_url - self.client = httpx.Client(base_url=base_url) - self.supports_blobs = True # we assume the server does, if not will find out later - self.logger = logging.getLogger( - __name__).getChild(self.__class__.__name__) - - def _handle_http_error( - self, +def make_handle_http_error(logger: logging.Logger): + def handle_http_error( error: HTTPStatusError, span: str, message: Optional[str] = None, @@ -36,7 +27,7 @@ def _handle_http_error( if error.response.status_code == 422: error_detail = error.response.json().get( "detail", "No detailed error message provided") - self.logger.error( + logger.error( message or f"HTTP {error.response.status_code} Error in {span}", extra={ **(extra or {}), @@ -50,6 +41,17 @@ def _handle_http_error( raise ValueError(f"Invalid input: {error_detail}") from error raise + return handle_http_error + + +class EllHTTPSerializer(EllSerializer): + def __init__(self, base_url: str): + self.base_url = base_url + self.client = httpx.Client(base_url=base_url) + self.supports_blobs = True # we assume the server does, if not will find out later + self.logger = logging.getLogger(__name__).getChild(self.__class__.__name__) + self._handle_http_error = make_handle_http_error(self.logger) + def get_lmp(self, lmp_id: str) -> GetLMPOutput: try: response = self.client.get(f"/lmp/{lmp_id}") @@ -57,7 +59,7 @@ def get_lmp(self, lmp_id: str) -> GetLMPOutput: data = response.json() return None if data is None else LMP(**data) except HTTPStatusError as e: - self._handle_http_error(e, "get_lmp") + self._handle_http_error(error=e, span="get_lmp", message="Failed to get LMP", extra={lmp_id: lmp_id}) raise def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: @@ -71,9 +73,9 @@ def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: response.raise_for_status() except HTTPStatusError as e: self._handle_http_error( - error=e, - span="write_lmp", message="Failed to write LMP", + span="write_lmp", + error=e, extra={'lmp_id': lmp.lmp_id, 'lmp_version': lmp.version_number} ) raise @@ -154,30 +156,7 @@ def __init__(self, base_url: str): self.supports_blobs = True # we assume the server does, if not will find out later self.logger = logging.getLogger( __name__).getChild(self.__class__.__name__) - - def _handle_http_error( - self, - error: HTTPStatusError, - span: str, - message: Optional[str] = None, - extra: Optional[Dict[str, Any]] = None - ) -> None: - if error.response.status_code == 422: - error_detail = error.response.json().get( - "detail", "No detailed error message provided") - self.logger.error( - message or f"HTTP {error.response.status_code} Error in {span}", - extra={ - **(extra or {}), - "status_code": error.response.status_code, - "error_detail": error_detail, - "span": span, - "url": str(error.response.url), - "request_id": error.response.headers.get("x-request-id"), - } - ) - raise ValueError(f"Invalid input: {error_detail}") from error - raise + self._handle_http_error = make_handle_http_error(self.logger) async def get_lmp(self, lmp_id: str) -> GetLMPOutput: try: @@ -196,6 +175,21 @@ async def get_lmp(self, lmp_id: str) -> GetLMPOutput: ) raise + async def get_lmp_versions(self, fqn: str) -> List[LMP]: + try: + response = await self.client.get("/lmp/versions", params={"fqn": fqn}) + response.raise_for_status() + data = response.json() + return [LMP(**lmp_data) for lmp_data in data] + except HTTPStatusError as e: + self._handle_http_error( + error=e, + span="get_lmp_versions", + message="Failed to get LMP versions", + extra={'fqn': fqn} + ) + raise + async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: try: response = await self.client.post("/lmp", json={ @@ -217,17 +211,13 @@ async def write_invocation(self, input: WriteInvocationInput) -> None: response = await self.client.post( "/invocation", headers={"Content-Type": "application/json"}, - content=input.model_dump_json(exclude_none=True, exclude_unset=True), + content=input.model_dump_json(exclude_none=True, exclude_unset=True) ) response.raise_for_status() return None except HTTPStatusError as e: - self._handle_http_error( - error=e, - span="write_invocation", - message="Failed to write invocation", - extra={'invocation_id': input.invocation.id} - ) + self._handle_http_error(message="Failed to write invocation", span="write_invocation", error=e, + extra={'invocation_id': input.invocation.id}) raise async def store_blob(self, blob_id: str, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str: @@ -270,18 +260,3 @@ async def __aenter__(self): async def __aexit__(self): await self.close() - - async def get_lmp_versions(self, fqn: str) -> List[LMP]: - try: - response = await self.client.get("/lmp/versions", params={"fqn": fqn}) - response.raise_for_status() - data = response.json() - return [LMP(**lmp_data) for lmp_data in data] - except HTTPStatusError as e: - self._handle_http_error( - error=e, - span="get_lmp_versions", - message="Failed to get LMP versions", - extra={'fqn': fqn} - ) - raise From b1a2445479df03a194cc8a423e29fb2e3200f9d3 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Wed, 20 Nov 2024 05:32:48 -0800 Subject: [PATCH 37/40] refactor(serialize): put uses in WriteLMPInput instead of second argument --- src/ell/api/server.py | 16 +++------------- src/ell/lmp/_track.py | 3 ++- src/ell/serialize/http.py | 16 +++++++--------- src/ell/serialize/protocol.py | 4 ++-- src/ell/serialize/sql.py | 8 ++++---- src/ell/types/serialize.py | 7 ++----- 6 files changed, 20 insertions(+), 34 deletions(-) diff --git a/src/ell/api/server.py b/src/ell/api/server.py index 1efbd63c9..c5fb54f9e 100644 --- a/src/ell/api/server.py +++ b/src/ell/api/server.py @@ -1,7 +1,6 @@ # todo. under ell.api.server.___main___ import asyncio from contextlib import asynccontextmanager, AsyncExitStack -import json import logging from typing import List, Optional @@ -108,11 +107,7 @@ async def get_lmp_versions( return await serializer.get_lmp_versions(fqn) @app.get("/lmp/{lmp_id}", response_model=GetLMPOutput) - async def get_lmp(lmp_id: str, - serializer: EllAsyncSerializer = Depends(get_serializer), - # todo. figure out the ramifications of doing this here - # session: Session = Depends(get_session) - ): + async def get_lmp(lmp_id: str, serializer: EllAsyncSerializer = Depends(get_serializer)): lmp = await serializer.get_lmp(lmp_id=lmp_id) if lmp is None: raise HTTPException(status_code=404, detail="LMP not found") @@ -121,22 +116,17 @@ async def get_lmp(lmp_id: str, @app.post("/lmp") async def write_lmp( lmp: WriteLMPInput, - # fixme. what is this type supposed to be? - uses: List[str], # SerializedLMPUses, pubsub: PubSub = Depends(get_pubsub), serializer: EllAsyncSerializer = Depends(get_serializer) ): - await serializer.write_lmp(lmp, uses) + await serializer.write_lmp(lmp) if pubsub: loop = asyncio.get_event_loop() loop.create_task( pubsub.publish( f"lmp/{lmp.lmp_id}/created", - json.dumps({ - "lmp": lmp.model_dump(), - "uses": uses - }, default=str) + lmp.model_dump_json(exclude_none=True, exclude_unset=True), ) ) diff --git a/src/ell/lmp/_track.py b/src/ell/lmp/_track.py index ef4dd04ff..0e64b2832 100644 --- a/src/ell/lmp/_track.py +++ b/src/ell/lmp/_track.py @@ -197,8 +197,9 @@ def _serialize_lmp(func): lmp_type=lmp_type, api_params=api_params if api_params else None, version_number=version, + uses=[f.__ell_hash__ for f in func.__ell_uses__], ) - config.serializer.write_lmp(serialized_lmp, [f.__ell_hash__ for f in func.__ell_uses__]) + config.serializer.write_lmp(serialized_lmp) func._has_serialized_lmp = True def _write_invocation(func, invocation_id, latency_ms, prompt_tokens, completion_tokens, diff --git a/src/ell/serialize/http.py b/src/ell/serialize/http.py index 9ff598d7f..4744f826a 100644 --- a/src/ell/serialize/http.py +++ b/src/ell/serialize/http.py @@ -45,9 +45,10 @@ def handle_http_error( class EllHTTPSerializer(EllSerializer): - def __init__(self, base_url: str): + def __init__(self, base_url: Optional[str] = None, client: Optional[httpx.Client] = None): + assert base_url is not None or client is not None, "Either base_url or client must be provided" self.base_url = base_url - self.client = httpx.Client(base_url=base_url) + self.client = client or httpx.Client(base_url=base_url) # type: ignore self.supports_blobs = True # we assume the server does, if not will find out later self.logger = logging.getLogger(__name__).getChild(self.__class__.__name__) self._handle_http_error = make_handle_http_error(self.logger) @@ -62,14 +63,11 @@ def get_lmp(self, lmp_id: str) -> GetLMPOutput: self._handle_http_error(error=e, span="get_lmp", message="Failed to get LMP", extra={lmp_id: lmp_id}) raise - def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + def write_lmp(self, lmp: WriteLMPInput) -> None: try: - response = self.client.post("/lmp", json={ - # todo. restructure so model_dump_json - # todo. because pydantic doesn't have a sane default for this we should consider a single place to specify exclude_none, exclude_unset like we had with unstructure for basemodel... - "lmp": lmp.model_dump(mode='json', exclude_none=True, exclude_unset=True), - "uses": uses - }) + response = self.client.post("/lmp", + headers={"Content-Type": "application/json"}, + content=lmp.model_dump_json(exclude_none=True, exclude_unset=True)) response.raise_for_status() except HTTPStatusError as e: self._handle_http_error( diff --git a/src/ell/serialize/protocol.py b/src/ell/serialize/protocol.py index 396befc6a..2da1a3a44 100644 --- a/src/ell/serialize/protocol.py +++ b/src/ell/serialize/protocol.py @@ -10,7 +10,7 @@ class EllSerializer(Protocol): def get_lmp(self, lmp_id: str) -> GetLMPOutput: ... - def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + def write_lmp(self, lmp: WriteLMPInput) -> None: ... def write_invocation(self, input: WriteInvocationInput) -> None: @@ -36,7 +36,7 @@ class EllAsyncSerializer(Protocol): async def get_lmp(self, lmp_id: str) -> GetLMPOutput: ... - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + async def write_lmp(self, lmp: WriteLMPInput) -> None: ... async def write_invocation(self, input: WriteInvocationInput) -> None: diff --git a/src/ell/serialize/sql.py b/src/ell/serialize/sql.py index d4cb26f44..f3224d054 100644 --- a/src/ell/serialize/sql.py +++ b/src/ell/serialize/sql.py @@ -21,9 +21,9 @@ def get_lmp_versions(self, fqn: str) -> List[LMP]: slmps = self.store.get_versions_by_fqn(fqn) return [LMP(**slmp.model_dump()) for slmp in slmps] - def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + def write_lmp(self, lmp: WriteLMPInput) -> None: model = SerializedLMP.coerce(lmp) - self.store.write_lmp(model, uses) + self.store.write_lmp(model, lmp.uses) def write_invocation(self, input: WriteInvocationInput) -> None: invocation = Invocation.coerce(input.invocation) @@ -60,9 +60,9 @@ async def get_lmp_versions(self, fqn: str) -> List[LMP]: slmps = self.store.get_versions_by_fqn(fqn) return [LMP(**slmp.model_dump()) for slmp in slmps] - async def write_lmp(self, lmp: WriteLMPInput, uses: List[str]) -> None: + async def write_lmp(self, lmp: WriteLMPInput) -> None: model = SerializedLMP.coerce(lmp) - self.store.write_lmp(model, uses) + self.store.write_lmp(model, lmp.uses) async def write_invocation(self, input: WriteInvocationInput) -> None: invocation = Invocation.coerce(input.invocation) diff --git a/src/ell/types/serialize.py b/src/ell/types/serialize.py index 2c4b4eae3..913f366a6 100644 --- a/src/ell/types/serialize.py +++ b/src/ell/types/serialize.py @@ -30,15 +30,12 @@ class WriteLMPInput(BaseModel): initial_free_vars: Optional[Dict[str, Any]] = None initial_global_vars: Optional[Dict[str, Any]] = None created_at: AwareDatetime = Field(default_factory=utc_now) - # TODO. dict or list? - # uses: List[str] = Field(default_factory=list) - - # this is omitted so as to not confuse whether the number should be incremented (should always happen at the db level) - # num_invocations: Optional[int] = None + uses: List[str] = Field(default_factory=list) commit_message: Optional[str] = None version_number: Optional[int] = None +# todo. see if we can get rid of this...the only difference with writelmpinput is some properties are read only class LMP(BaseModel): lmp_id: str name: str From 1c3af0ffc96b6c8595d6b5d01fa6cab1b948001b Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Wed, 20 Nov 2024 05:33:04 -0800 Subject: [PATCH 38/40] add http client tests --- tests/api/test_api.py | 94 +++++++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 21 deletions(-) diff --git a/tests/api/test_api.py b/tests/api/test_api.py index d478c176a..a4d3e8716 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -2,18 +2,21 @@ from logging import DEBUG from uuid import uuid4 import pytest -from typing import Any, Dict +from typing import Any, Dict, Tuple + +from fastapi import FastAPI from fastapi.testclient import TestClient from pydantic import BaseModel, Field, ValidationError import ell from ell import Message +from ell.serialize.http import EllHTTPSerializer from ell.serialize.sqlite import SQLiteSerializer, AsyncSQLiteSerializer from ell.api.server import create_app, get_pubsub, get_serializer from ell.api.config import Config from ell.api.logger import setup_logging from ell.types import ToolCall -from ell.types.serialize import utc_now, Invocation, InvocationContents +from ell.types.serialize import WriteInvocationInput, utc_now, Invocation, InvocationContents from ell.stores.studio import SerializedLMP from ell.types.lmp import LMPType from ell.types.serialize import WriteLMPInput @@ -96,7 +99,7 @@ def test_write_lmp_input(): assert input2.created_at.tzinfo == timezone.utc -def create_test_app(serializer: AsyncSQLiteSerializer): +def create_test_app(serializer: AsyncSQLiteSerializer) -> Tuple[FastAPI, EllHTTPSerializer, None, Config]: setup_logging(DEBUG) config = Config(storage_dir=":memory:") app = create_app(config) @@ -112,7 +115,7 @@ def get_serializer_override(): app.dependency_overrides[get_pubsub] = get_publisher_override app.dependency_overrides[get_serializer] = get_serializer_override - client = TestClient(app) + client = EllHTTPSerializer(client=TestClient(app)) return app, client, publisher, config @@ -120,7 +123,6 @@ def get_serializer_override(): def test_write_lmp(async_sqlite_serializer: AsyncSQLiteSerializer): _app, client, *_ = create_test_app(async_sqlite_serializer) - # fime. figure out what's going on with `uses` lmp_data: Dict[str, Any] = { "lmp_id": uuid4().hex, "name": "Test LMP", @@ -133,28 +135,28 @@ def test_write_lmp(async_sqlite_serializer: AsyncSQLiteSerializer): "initial_global_vars": {"global_var1": "value1"}, "initial_free_vars": {"free_var1": "value2"}, "commit_message": "Initial commit", - "created_at": utc_now().isoformat().replace("+00:00", "Z") + "created_at": utc_now().isoformat().replace("+00:00", "Z"), + "uses": ['used_lmp_1'] } - # uses: List[str] = {"used_lmp_1": {}, "used_lmp_2": {}}, - response = client.post( - "/lmp", - json={ - "lmp": lmp_data, - "uses": [] - } - ) + response = client.client.post("/lmp", json=lmp_data) + + # response = client.write_lmp( + # WriteLMPInput(**lmp_data), + # ) assert response.status_code == 200 - lmp = client.get(f"/lmp/{lmp_data['lmp_id']}") + lmp = client.client.get(f"/lmp/{lmp_data['lmp_id']}") assert lmp.status_code == 200 - # del lmp_data["uses"] + del lmp_data["uses"] # todo. return uses y/n? assert lmp.json() == {**lmp_data, "num_invocations": 0} def test_write_invocation(async_sqlite_serializer: AsyncSQLiteSerializer): _app, client, *_ = create_test_app(async_sqlite_serializer) + # Test basic http client functionality + client = client.client # first write an lmp.. lmp_id = uuid4().hex @@ -166,10 +168,9 @@ def test_write_invocation(async_sqlite_serializer: AsyncSQLiteSerializer): "lmp_type": LMPType.LM, "api_params": {"param1": "value1"}, } - response = client.post( - "/lmp", - json={'lmp': lmp_data, 'uses': []} - ) + + response = client.post("/lmp", json=lmp_data) + try: assert response.status_code == 200 except Exception as e: @@ -262,6 +263,8 @@ def test_invocation_json_round_trip(): def test_write_invocation_tool_call(async_sqlite_serializer: AsyncSQLiteSerializer): _app, client, *_ = create_test_app(async_sqlite_serializer) + # Test basic http functionality + client = client.client # first write an lmp.. lmp_id = uuid4().hex @@ -275,7 +278,7 @@ def test_write_invocation_tool_call(async_sqlite_serializer: AsyncSQLiteSerializ } response = client.post( "/lmp", - json={'lmp': lmp_data, 'uses': []} + json=lmp_data ) try: assert response.status_code == 200 @@ -310,6 +313,55 @@ def test_write_invocation_tool_call(async_sqlite_serializer: AsyncSQLiteSerializ print(response.json()) assert response.status_code == 200 +def test_http_client_write_lmp(async_sqlite_serializer: AsyncSQLiteSerializer): + _app, client, *_ = create_test_app(async_sqlite_serializer) + + lmp_data: Dict[str, Any] = { + "lmp_id": uuid4().hex, + "lmp_type": LMPType.LM, + "name": "Test LMP", + "source": "def test_function(): pass", + "dependencies": str(["dep1", "dep2"]), + } + result = client.write_lmp(WriteLMPInput( + lmp_id=lmp_data["lmp_id"], + lmp_type=lmp_data["lmp_type"], + name=lmp_data["name"], + source=lmp_data["source"], + dependencies=lmp_data["dependencies"], + )) + assert result is None + +def test_http_client_write_invocation(async_sqlite_serializer: AsyncSQLiteSerializer): + _app, client, *_ = create_test_app(async_sqlite_serializer) + + # Invocation depends on an lmp being written so write one first + lmp_id = uuid4().hex + + client.write_lmp(WriteLMPInput( + lmp_id=lmp_id, + name="Test LMP", + source="def test_function(): pass", + dependencies=str(["dep1", "dep2"]), + lmp_type=LMPType.LM, + )) + + invocation_id = uuid4().hex + result = client.write_invocation(WriteInvocationInput( + invocation=Invocation( + id=invocation_id, + lmp_id=lmp_id, + contents=InvocationContents( + invocation_id=invocation_id, + results=[Message(role='user', content="hello")] + ), + created_at=utc_now(), + latency_ms=42.0, + ), + consumes=[] + )) + assert result is None + if __name__ == "__main__": pytest.main() From a8e2417226f7b48b9de00281adaca46f6dffade2 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Wed, 20 Nov 2024 06:13:28 -0800 Subject: [PATCH 39/40] use stores.models --- src/ell/serialize/sql.py | 2 +- tests/api/test_api.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ell/serialize/sql.py b/src/ell/serialize/sql.py index f3224d054..0952fd5da 100644 --- a/src/ell/serialize/sql.py +++ b/src/ell/serialize/sql.py @@ -1,7 +1,7 @@ from typing import List, Optional, Dict, Any from ell.stores.store import Store -from ell.stores.studio import Invocation, SerializedLMP +from ell.stores.models import Invocation, SerializedLMP from ell.types.serialize import LMP, WriteLMPInput, WriteInvocationInput from ell.serialize.protocol import EllSerializer, EllAsyncSerializer diff --git a/tests/api/test_api.py b/tests/api/test_api.py index a4d3e8716..769da2af1 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -17,7 +17,7 @@ from ell.api.logger import setup_logging from ell.types import ToolCall from ell.types.serialize import WriteInvocationInput, utc_now, Invocation, InvocationContents -from ell.stores.studio import SerializedLMP +from ell.stores.models import SerializedLMP from ell.types.lmp import LMPType from ell.types.serialize import WriteLMPInput From b2b416a7753bbea5130a4456c8057d7059e58862 Mon Sep 17 00:00:00 2001 From: Alex Dixon Date: Wed, 20 Nov 2024 06:25:00 -0800 Subject: [PATCH 40/40] allow dburl none if engine provided --- src/ell/stores/sql.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ell/stores/sql.py b/src/ell/stores/sql.py index f3913f201..847f7473e 100644 --- a/src/ell/stores/sql.py +++ b/src/ell/stores/sql.py @@ -33,8 +33,7 @@ logger = logging.getLogger(__name__) class SQLStore(ell.stores.store.Store): - def __init__(self, db_uri: str, blob_store: Optional[ell.stores.store.BlobStore] = None, - engine: Optional[Engine] = None, ): + def __init__(self, db_uri: str = None, blob_store: Optional[ell.stores.store.BlobStore] = None, engine: Optional[Engine] = None): if engine is not None: self.engine = engine elif db_uri is None: