Bases: LLM
Simple wrapper around OpenAI-like LLMs to indicate vLLM usage in extractors.extract_from_text
Source code in src/kibad_llm/llms/openai_like_vllm.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64 | class OpenAILikeVllm(LLM):
"""Simple wrapper around OpenAI-like LLMs to indicate vLLM usage in extractors.extract_from_text"""
def __init__(self, *args, **kwargs) -> None:
self.model = OpenAILike(*args, **kwargs)
def call_llm_chat_with_guided_decoding(
self,
messages: list[SimpleChatMessage],
*,
json_schema: dict[str, Any] | None = None,
**request_kwargs,
) -> ChatResponse:
if json_schema is not None:
# vllm hosted models require json schema guided decoding via extra_body
if "extra_body" not in request_kwargs:
request_kwargs["extra_body"] = {}
if "structured_outputs" in request_kwargs["extra_body"]:
warn_once(
f'Overwriting existing "structured_outputs": '
f'{request_kwargs["extra_body"]["structured_outputs"]} '
'in request_parameters["extra_body"] with provided json schema for '
'guided decoding ("structured_outputs": {"json": schema}).'
)
request_kwargs["extra_body"]["structured_outputs"] = {"json": json_schema}
llama_index_messages = [
LlamaIndexChatMessage(role=msg.role, content=msg.content) for msg in messages
]
try:
return self.model.chat(llama_index_messages, **request_kwargs)
except BadRequestError as e:
# align error type with in_process LLMs
raise ValueError(e.message) from e
def get_reasoning_from_chat_response(self, response: ChatResponse) -> str:
"""Extract reasoning from a chat response."""
raw_msg = self.get_raw_message_from_chat_response(response)
# vLLM: prefer `reasoning`, fallback to legacy `reasoning_content`
result = getattr(raw_msg, "reasoning", None) or getattr(raw_msg, "reasoning_content", None)
if not isinstance(result, str):
raise ReasoningExtractionError("Could not extract reasoning from chat response.")
if not result.strip():
raise EmptyReasoningError("Extracted reasoning is empty.")
return result
|
get_reasoning_from_chat_response(response)
Extract reasoning from a chat response.
Source code in src/kibad_llm/llms/openai_like_vllm.py
52
53
54
55
56
57
58
59
60
61
62
63
64 | def get_reasoning_from_chat_response(self, response: ChatResponse) -> str:
"""Extract reasoning from a chat response."""
raw_msg = self.get_raw_message_from_chat_response(response)
# vLLM: prefer `reasoning`, fallback to legacy `reasoning_content`
result = getattr(raw_msg, "reasoning", None) or getattr(raw_msg, "reasoning_content", None)
if not isinstance(result, str):
raise ReasoningExtractionError("Could not extract reasoning from chat response.")
if not result.strip():
raise EmptyReasoningError("Extracted reasoning is empty.")
return result
|