diff --git a/apps/llm/reasoning.py b/apps/llm/reasoning.py index 79768783d258fff4eb02a4fd5809e1110186fa33..fddc84db89d98bb8c29d96b4dee71aeed49804ea 100644 --- a/apps/llm/reasoning.py +++ b/apps/llm/reasoning.py @@ -142,7 +142,7 @@ class ReasoningLLM: return await self._client.chat.completions.create( model=model, messages=messages, # type: ignore[] - max_tokens=max_tokens or self._config.max_tokens, + max_completion_tokens=max_tokens or self._config.max_tokens, temperature=temperature or self._config.temperature, stream=True, stream_options={"include_usage": True},