8000 feat: tools structured response by kausmeows · Pull Request #3165 · agno-agi/agno · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

feat: tools structured response #3165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions cookbook/tools/dalle_tools_structured_response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

"""Run `pip install openai` to install dependencies."""

from pathlib import Path

from agno.agent import Agent
from agno.tools.dalle import DalleTools
from agno.utils.media import download_image

# Create an Agent with the DALL-E tool
agent = Agent(tools=[DalleTools()], name="DALL-E Image Generator")

# Example 1: Generate a basic image with default settings
agent.print_response(
"Generate an image of a white furry cat sitting on a couch. What is the color of the cat?",
markdown=True,
)

# agent.print_response(
# "What is the color of the cat?",
# markdown=True
# )
62 changes: 59 additions & 3 deletions libs/agno/agno/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from pydantic import BaseModel

from agno.exceptions import AgentRunException
from agno.media import AudioResponse, ImageArtifact
from agno.media import AudioResponse, Image, ImageArtifact
from agno.models.message import Citations, Message, MessageMetrics
from agno.models.response import ModelResponse, ModelResponseEvent
from agno.tools.function import Function, FunctionCall
from agno.tools.function import Function, FunctionCall, FunctionCallResult
from agno.utils.log import log_debug, log_error, log_warning
from agno.utils.timer import Timer
from agno.utils.tools import get_function_call_for_tool_call
Expand Down Expand Up @@ -863,7 +863,7 @@ def _create_function_call_result(
self, fc: FunctionCall, success: bool, output: Optional[Union[List[Any], str]], timer: Timer
) -> Message:
"""Create a function call result message."""
return Message(
message = Message(
role=self.tool_message_role,
content=output if success else fc.error,
tool_call_id=fc.call_id,
Expand All @@ -874,6 +874,33 @@ def _create_function_call_result(
metrics=MessageMetrics(time=timer.elapsed),
)

# Return the message
return message

# This gives the error- agno.exceptions.ModelProviderError: Invalid 'messages[4]'. Image URLs are only allowed for messages with role 'user', but this message with role 'tool' contains an image URL.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll have to create a separate message for "user" with the image, as if the user sent an image after the tool result. Still not sure whether it would work, but worth a try.

# def _create_function_call_result(
# self, fc: FunctionCall, success: bool, output: Optional[Union[List[Any], str]], timer: Timer
# ) -> Message:
# """Create a function call result message."""
# images = None

# if isinstance(fc.result, FunctionCallResult):
# # Convert ImageArtifact to Image objects using the from_artifact method
# if fc.result.images:
# images = [Image.from_artifact(img) for img in fc.result.images]

# return Message(
# role=self.tool_message_role,
# content=output if success else fc.error,
# tool_call_id=fc.call_id,
# tool_name=fc.function.name,
# tool_args=fc.arguments,
# tool_call_error=not success,
# stop_after_tool_call=fc.function.stop_after_tool_call,
# metrics=MessageMetrics(time=timer.elapsed),
# images=images
# )

def run_function_calls(
self,
function_calls: List[FunctionCall],
Expand Down Expand Up @@ -924,6 +951,33 @@ def run_function_calls(
# Process function call output
function_call_output: str = ""

# if isinstance(fc.result, FunctionCallResult):
# function_call_output = fc.result.content
# if fc.result.images:
# # Convert ImageArtifact to proper format for ModelResponse
# yield ModelResponse(
# content=function_call_output,
# images=[ImageArtifact(
# id=img.id,
# url=img.url,
# original_prompt=img.original_prompt,
# revised_prompt=img.revised_prompt
# ) for img in fc.result.images]
# )
if isinstance(fc.result, FunctionCallResult):
function_call_output = fc.result.content
if fc.result.images:
# Yield a separate ModelResponse with the images
yield ModelResponse(
content=function_call_output,
images=fc.result.images, # Keep the original ImageArtifact objects
)
else:
yield ModelResponse(content=function_call_output)
else:
function_call_output = str(fc.result)
yield ModelResponse(content=function_call_output)

if isinstance(fc.result, (GeneratorType, collections.abc.Iterator)):
for item in fc.result:
function_call_output += str(item)
Expand All @@ -938,6 +992,8 @@ def run_function_calls(
function_call_result = self._create_function_call_result(
fc, success=function_call_success, output=function_call_output, timer=function_call_timer
)

print("--> function_call_result", function_call_result)
yield ModelResponse(
content=f"{fc.get_call_str()} completed in {function_call_timer.elapsed:.4f}s.",
tool_calls=[function_call_result.to_function_call_dict()],
Expand Down
1 change: 1 addition & 0 deletions libs/agno/agno/models/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class ModelResponse:
parsed: Optional[Any] = None
audio: Optional[AudioResponse] = None
image: Optional[ImageArtifact] = None
images: Optional[List[ImageArtifact]] = None
tool_calls: List[Dict[str, Any]] = field(default_factory=list)
event: str = ModelResponseEvent.assistant_response.value

Expand Down
31 changes: 15 additions & 16 deletions libs/agno/agno/tools/dalle.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from agno.media import ImageArtifact
from agno.team.team import Team
from agno.tools import Toolkit
from agno.tools.function import FunctionCallResult
from agno.utils.log import log_debug, logger

try:
Expand Down Expand Up @@ -58,17 +59,9 @@ def __init__(
# - Add support for saving images
# - Add support for editing images

def create_image(self, agent: Union[Agent, Team], prompt: str) -> str:
"""Use this function to generate an image for a prompt.

Args:
prompt (str): A text description of the desired image.

Returns:
str: str: A message indicating if the image has been generated successfully or an error message.
"""
def create_image(self, agent: Union[Agent, Team], prompt: str) -> FunctionCallResult:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You still need the docstring, for the model

if not self.api_key:
return "Please set the OPENAI_API_KEY"
return FunctionCallResult(content="Please set the OPENAI_API_KEY")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't have to always use the FunctionCallResult, strings and other types should still be allowed.


try:
client = OpenAI(api_key=self.api_key)
Expand All @@ -83,18 +76,24 @@ def create_image(self, agent: Union[Agent, Team], prompt: str) -> str:
)
log_debug("Image generated successfully")

generated_images = []

# Update the run response with the image URLs
response_str = ""
if response.data:
for img in response.data:
if img.url:
agent.add_image(
ImageArtifact(
id=str(uuid4()), url=img.url, original_prompt=prompt, revised_prompt=img.revised_prompt
)
image_artifact = ImageArtifact(
id=str(uuid4()), url=img.url, original_prompt=prompt, revised_prompt=img.revised_prompt
)
agent.add_image(image_artifact)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally we won't do this anymore but automatically handle it in the model/agent. But yeh for simplicity lets keep it here for now. We can clean up in Agno 2.0

response_str += f"Image has been generated at the URL {img.url}\n"
return response_str or "No images were generated"
generated_images.append(image_artifact)

# Create a more descriptive response that includes details about the image
content = f"The image shows exactly what was requested - {prompt}. "

return FunctionCallResult(content=content, images=generated_images if generated_images else None)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is the winner

except Exception as e:
logger.error(f"Failed to generate image: {e}")
return f"Error: {e}"
return FunctionCallResult(content=f"Error: {e}", images=None)
7 changes: 7 additions & 0 deletions libs/agno/agno/tools/function.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from dataclasses import dataclass
from functools import partial
from typing import Any, Callable, Dict, List, Optional, TypeVar, get_type_hints

from docstring_parser import parse
from pydantic import BaseModel, Field, validate_call

from agno.exceptions import AgentRunException
from agno.media import AudioArtifact, ImageArtifact, VideoArtifact
from agno.utils.log import log_debug, log_error, log_exception, log_warning

T = TypeVar("T")
Expand Down Expand Up @@ -740,3 +742,8 @@ async def aexecute(self) -> bool:
self._handle_post_hook()

return function_call_success


class FunctionCallResult(BaseModel):
content: Any
images: Optional[List[ImageArtifact]] = None
Loading
0