diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml new file mode 100644 index 00000000..645ae5f2 --- /dev/null +++ b/.github/workflows/build-wheel.yml @@ -0,0 +1,41 @@ +# Builds the memu-py wheel for linux-x86_64 on every push to main. +# The uploaded artifact ("memu-wheel") can be consumed cross-repo via +# actions/download-artifact@v7 with `repository` + `github-token` params, +# or via the GitHub REST API: GET /repos/{owner}/{repo}/actions/artifacts?name=memu-wheel + +name: build-wheel + +on: + push: + branches: + - main + +jobs: + build-wheel: + name: build linux-x86_64 wheel + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + python-version: "3.13" + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Install maturin + run: uv tool install maturin + + - name: Build wheel + run: uvx maturin build --release --out dist --compatibility manylinux_2_39 + + - name: Upload wheel artifact + uses: actions/upload-artifact@v7 + with: + name: memu-wheel + path: dist/*.whl + retention-days: 7 + if-no-files-found: error diff --git a/src/memu/llm/backends/openai.py b/src/memu/llm/backends/openai.py index aef24fc6..b9a33bf2 100644 --- a/src/memu/llm/backends/openai.py +++ b/src/memu/llm/backends/openai.py @@ -15,15 +15,17 @@ def build_summary_payload( self, *, text: str, system_prompt: str | None, chat_model: str, max_tokens: int | None ) -> dict[str, Any]: prompt = system_prompt or "Summarize the text in one short paragraph." - return { + payload: dict[str, Any] = { "model": chat_model, "messages": [ {"role": "system", "content": prompt}, {"role": "user", "content": text}, ], "temperature": 0.2, - "max_tokens": max_tokens, } + if max_tokens is not None: + payload["max_tokens"] = max_tokens + return payload def parse_summary_response(self, data: dict[str, Any]) -> str: return cast(str, data["choices"][0]["message"]["content"]) @@ -56,9 +58,11 @@ def build_vision_payload( ], }) - return { + payload: dict[str, Any] = { "model": chat_model, "messages": messages, "temperature": 0.2, - "max_tokens": max_tokens, } + if max_tokens is not None: + payload["max_tokens"] = max_tokens + return payload diff --git a/src/memu/llm/openai_sdk.py b/src/memu/llm/openai_sdk.py index 38c6c8bb..08dae608 100644 --- a/src/memu/llm/openai_sdk.py +++ b/src/memu/llm/openai_sdk.py @@ -53,12 +53,15 @@ async def chat( user_message: ChatCompletionUserMessageParam = {"role": "user", "content": prompt} messages.append(user_message) - response = await self.client.chat.completions.create( - model=self.chat_model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens, - ) + kwargs: dict[str, Any] = { + "model": self.chat_model, + "messages": messages, + "temperature": temperature, + } + if max_tokens is not None: + kwargs["max_tokens"] = max_tokens + + response = await self.client.chat.completions.create(**kwargs) content = response.choices[0].message.content logger.debug("OpenAI chat response: %s", response) return content or "", response @@ -76,12 +79,15 @@ async def summarize( user_message: ChatCompletionUserMessageParam = {"role": "user", "content": text} messages: list[ChatCompletionMessageParam] = [system_message, user_message] - response = await self.client.chat.completions.create( - model=self.chat_model, - messages=messages, - temperature=1, - max_tokens=max_tokens, - ) + kwargs: dict[str, Any] = { + "model": self.chat_model, + "messages": messages, + "temperature": 1, + } + if max_tokens is not None: + kwargs["max_tokens"] = max_tokens + + response = await self.client.chat.completions.create(**kwargs) content = response.choices[0].message.content logger.debug("OpenAI summarize response: %s", response) return content or "", response @@ -142,12 +148,15 @@ async def vision( } messages.append(user_message) - response = await self.client.chat.completions.create( - model=self.chat_model, - messages=messages, - temperature=1, - max_tokens=max_tokens, - ) + kwargs: dict[str, Any] = { + "model": self.chat_model, + "messages": messages, + "temperature": 1, + } + if max_tokens is not None: + kwargs["max_tokens"] = max_tokens + + response = await self.client.chat.completions.create(**kwargs) content = response.choices[0].message.content logger.debug("OpenAI vision response: %s", response) return content or "", response