deepset-ai · YassinNouh21 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
@@ -371,8 +371,8 @@ def _create_dataclass_schema(self, python_type: Any, description: str) -> Dict[s
         cls = python_type if isinstance(python_type, type) else python_type.__class__
         for field in fields(cls):
             field_description = f"Field '{field.name}' of '{cls.__name__}'."
-            if isinstance(schema["properties"], dict):
-                schema["properties"][field.name] = self._create_property_schema(field.type, field_description)
+            field_schema = self._create_property_schema(field.type, field_description)
+            schema["properties"][field.name] = field_schema
         return schema
 
     @staticmethod
@@ -384,8 +384,43 @@ def _create_basic_type_schema(python_type: Any, description: str) -> Dict[str, A
         :param description: The description of the type.
         :returns: A dictionary representing the basic type schema.
         """
-        type_mapping = {str: "string", int: "integer", float: "number", bool: "boolean", dict: "object"}
-        return {"type": type_mapping.get(python_type, "string"), "description": description}
+        type_mapping = {str: "string", int: "integer", float: "number", bool: "boolean"}
+        schema = {"type": type_mapping.get(python_type, "string"), "description": description}
+        return schema

+    def _create_union_schema(self, types: tuple, description: str) -> Dict[str, Any]:
+        """
+        Creates a schema for a Union type.
+
+        :param types: The types in the Union.
+        :param description: The description of the Union.
+        :returns: A dictionary representing the Union schema.
+        """
+        schemas = []
+        for arg_type in types:
+            if arg_type is not type(None):
+                # Special case: dict or list of dicts
+                if arg_type is dict or (get_origin(arg_type) is dict):
+                    arg_schema = {"type": "object", "additionalProperties": True}
+                elif get_origin(arg_type) is list:
+                    item_type = get_args(arg_type)[0] if get_args(arg_type) else Any
+                    if item_type is dict or (get_origin(item_type) is dict):
+                        items_schema = {"type": "object", "additionalProperties": True}
+                    else:
+                        items_schema = self._create_property_schema(item_type, "")
+                        items_schema.pop("description", None)
+                    arg_schema = {"type": "array", "items": items_schema}
+                else:
+                    arg_schema = self._create_property_schema(arg_type, "")
+                    arg_schema.pop("description", None)
+                schemas.append(arg_schema)
+
+        if len(schemas) == 1:
+            schema = schemas[0]
+            schema["description"] = description
+        else:
+            schema = {"oneOf": schemas, "description": description}
+        return schema
 
     def _create_property_schema(self, python_type: Any, description: str, default: Any = None) -> Dict[str, Any]:
         """
@@ -403,15 +438,39 @@ def _create_property_schema(self, python_type: Any, description: str, default: A
             python_type = non_none_types[0] if non_none_types else str
 
         origin = get_origin(python_type)
-        if origin is list:
-            schema = self._create_list_schema(get_args(python_type)[0] if get_args(python_type) else Any, description)
+        args = get_args(python_type)
+
+        # Handle Dict[str, Any] as a special case for meta fields
+        if origin is dict and args and args[0] is str and args[1] is Any:
+            if description and "meta" in description.lower():
+                schema = {"type": "string", "description": description}
+            else:
+                schema = {"type": "object", "description": description, "additionalProperties": True}
+        # Handle other dict types
+        elif python_type is dict or (origin is dict):
+            schema = {"type": "object", "description": description, "additionalProperties": True}
+        # Handle list
+        elif origin is list:
+            item_type = args[0] if args else Any
+            # Special case: list of dicts
+            if item_type is dict or (get_origin(item_type) is dict):
+                items_schema = {"type": "object", "additionalProperties": True}
+            else:
+                items_schema = self._create_property_schema(item_type, "")
+                items_schema.pop("description", None)
+            schema = {"type": "array", "description": description, "items": items_schema}
+        # Handle dataclass
         elif is_dataclass(python_type):
             schema = self._create_dataclass_schema(python_type, description)
+        # Handle Pydantic v2 models (unsupported)
         elif hasattr(python_type, "model_validate"):
             raise SchemaGenerationError(
                 f"Pydantic models (e.g. {python_type.__name__}) are not supported as input types for "
                 f"component's run method."
             )
+        # Handle Union (including Optional)
+        elif origin is Union:
+            schema = self._create_union_schema(args, description)
         else:
             schema = self._create_basic_type_schema(python_type, description)
 

@@ -6,7 +6,7 @@
 import os
 from copy import deepcopy
 from dataclasses import dataclass
-from typing import Dict, List
+from typing import Dict, List, Union, Optional, Any
 
 import pytest
 
@@ -122,6 +122,26 @@ def run(self, documents: List[Document], top_k: int = 5) -> Dict[str, str]:
         return {"concatenated": "\n".join(doc.content for doc in documents[:top_k])}
 
 
+@component
+class ComplexTypeProcessor:
+    """A component that processes complex types."""
+
+    @component.output_types(result=str)
+    def run(
+        self, meta: Union[Dict[str, Any], List[Dict[str, Any]]] = None, extraction_kwargs: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, str]:
+        """
+        Processes complex types like dictionaries and unions.
+
+        :param meta: Optional metadata to attach, can be a dictionary or list of dictionaries
+        :param extraction_kwargs: Optional dictionary containing keyword arguments to customize the extraction process
+        :return: A dictionary with the result
+        """
+        meta_str = str(meta) if meta else "No metadata"
+        kwargs_str = str(extraction_kwargs) if extraction_kwargs else "No kwargs"
+        return {"result": f"Meta: {meta_str}, Kwargs: {kwargs_str}"}
+
+
 def output_handler(old, new):
     """
     Output handler to test serialization.
@@ -335,6 +355,39 @@ def foo(self, text: str):
         with pytest.raises(ValueError):
             ComponentTool(component=not_a_component, name="invalid_tool", description="This should fail")
 
+    def test_from_component_with_complex_types(self):
+        component = ComplexTypeProcessor()
+
+        tool = ComponentTool(component=component)
+
+        # Check the parameter schema
+        assert "meta" in tool.parameters["properties"]
+        assert "extraction_kwargs" in tool.parameters["properties"]
+
+        # Meta should be oneOf with both object and array options
+        meta_schema = tool.parameters["properties"]["meta"]
+        assert meta_schema["description"].startswith("Optional metadata")
+        assert "oneOf" in meta_schema
+
+        # extraction_kwargs should be an object
+        kwargs_schema = tool.parameters["properties"]["extraction_kwargs"]
+        assert kwargs_schema["type"] == "object"
+        assert "additionalProperties" in kwargs_schema
+        assert kwargs_schema["additionalProperties"] is True
+
+        # Test tool invocation with dict
+        result = tool.invoke(meta={"source": "web"}, extraction_kwargs={"timeframe": "last month"})
+        assert isinstance(result, dict)
+        assert "result" in result
+        assert "Meta: {'source': 'web'}" in result["result"]
+        assert "Kwargs: {'timeframe': 'last month'}" in result["result"]
+
+        # Test tool invocation with list of dicts
+        result = tool.invoke(meta=[{"id": 1}, {"id": 2}])
+        assert isinstance(result, dict)
+        assert "result" in result
+        assert "Meta: [{'id': 1}, {'id': 2}]" in result["result"]
+
 
 ## Integration tests
 class TestToolComponentInPipelineWithOpenAI: