RobotecAI · jmatejcz · Jun 20, 2025 · Jun 20, 2025 · Jun 20, 2025
diff --git a/src/rai_bench/rai_bench/tool_calling_agent/predefined/spatial_reasoning_tasks.py b/src/rai_bench/rai_bench/tool_calling_agent/predefined/spatial_reasoning_tasks.py
@@ -32,66 +32,6 @@
 )
 
 IMG_PATH = "src/rai_bench/rai_bench/tool_calling_agent/predefined/images/"
-true_response_inputs: List[BoolImageTaskInput] = [
-    BoolImageTaskInput(
-        question="Is the door on the left from the desk?",
-        images_paths=[IMG_PATH + "image_1.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Is the light on in the room?",
-        images_paths=[IMG_PATH + "image_2.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Do you see the plant?",
-        images_paths=[IMG_PATH + "image_2.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Are there any pictures on the wall?",
-        images_paths=[IMG_PATH + "image_3.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Are there 3 pictures on the wall?",
-        images_paths=[IMG_PATH + "image_4.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Is there a plant behind the rack?",
-        images_paths=[IMG_PATH + "image_5.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Is there a pillow on the armchain?",
-        images_paths=[IMG_PATH + "image_7.jpg"],
-    ),
-]
-false_response_inputs: List[BoolImageTaskInput] = [
-    BoolImageTaskInput(
-        question="Is the door open?",
-        images_paths=[IMG_PATH + "image_1.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Is someone in the room?",
-        images_paths=[IMG_PATH + "image_1.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Do you see the plant?",
-        images_paths=[IMG_PATH + "image_3.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Are there 4 pictures on the wall?",
-        images_paths=[IMG_PATH + "image_4.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Is there a rack on the left from the sofa?",
-        images_paths=[IMG_PATH + "image_4.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Is there a plant on the right from the window?",
-        images_paths=[IMG_PATH + "image_6.jpg"],
-    ),
-    BoolImageTaskInput(
-        question="Is there a red pillow on the armchair?",
-        images_paths=[IMG_PATH + "image_7.jpg"],
-    ),
-]
 ########## SUBTASKS #################################################################
 return_true_subtask = CheckArgsToolCallSubTask(
     expected_tool_name="return_bool_response", expected_args={"response": True}
@@ -127,8 +67,8 @@ def get_spatial_tasks(
     easy_true_inputs = [
         # Single object presence/detection
         BoolImageTaskInput(
-            question="Is the light on in the room?",
-            images_paths=[IMG_PATH + "image_2.jpg"],
+            question="Is the chair in the room?",
+            images_paths=[IMG_PATH + "image_1.jpg"],
         ),
         BoolImageTaskInput(
             question="Do you see the plant?", images_paths=[IMG_PATH + "image_2.jpg"]
@@ -138,8 +78,8 @@ def get_spatial_tasks(
             images_paths=[IMG_PATH + "image_3.jpg"],
         ),
         BoolImageTaskInput(
-            question="Is there a pillow on the armchain?",
-            images_paths=[IMG_PATH + "image_7.jpg"],
+            question="is there a TV in the room?",
+            images_paths=[IMG_PATH + "image_4.jpg"],
         ),
     ]
 
@@ -149,6 +89,18 @@ def get_spatial_tasks(
             question="Are there 3 pictures on the wall?",
             images_paths=[IMG_PATH + "image_4.jpg"],
         ),
+        BoolImageTaskInput(
+            question="Is the light on in the room?",
+            images_paths=[IMG_PATH + "image_2.jpg"],
+        ),
+        BoolImageTaskInput(
+            question="Is the chair blue?",
+            images_paths=[IMG_PATH + "image_3.jpg"],
+        ),
+        BoolImageTaskInput(
+            question="Is there something to sit on?",
+            images_paths=[IMG_PATH + "image_7.jpg"],
+        ),
     ]
 
     hard_true_inputs = [
@@ -161,6 +113,14 @@ def get_spatial_tasks(
             question="Is there a plant behind the rack?",
             images_paths=[IMG_PATH + "image_5.jpg"],
         ),
+        BoolImageTaskInput(
+            question="Is there a rug under the bed?",
+            images_paths=[IMG_PATH + "image_2.jpg"],
+        ),
+        BoolImageTaskInput(
+            question="Is there a pillow on the armchain?",
+            images_paths=[IMG_PATH + "image_7.jpg"],
+        ),
     ]
 
     easy_false_inputs = [
@@ -175,6 +135,14 @@ def get_spatial_tasks(
             question="Is there a red pillow on the armchair?",
             images_paths=[IMG_PATH + "image_7.jpg"],
         ),
+        BoolImageTaskInput(
+            question="Is there a red desk with chair in the room?",
+            images_paths=[IMG_PATH + "image_5.jpg"],
+        ),
+        BoolImageTaskInput(
+            question="Do you see the bed?",
+            images_paths=[IMG_PATH + "image_6.jpg"],
+        ),
     ]
 
     medium_false_inputs = [
@@ -186,6 +154,14 @@ def get_spatial_tasks(
             question="Are there 4 pictures on the wall?",
             images_paths=[IMG_PATH + "image_4.jpg"],
         ),
+        BoolImageTaskInput(
+            question="Is the TV switched on?",
+            images_paths=[IMG_PATH + "image_6.jpg"],
+        ),
+        BoolImageTaskInput(
+            question="Is the window opened?",
+            images_paths=[IMG_PATH + "image_6.jpg"],
+        ),
     ]
 
     hard_false_inputs = [
@@ -198,6 +174,10 @@ def get_spatial_tasks(
             question="Is there a plant on the right from the window?",
             images_paths=[IMG_PATH + "image_6.jpg"],
         ),
+        BoolImageTaskInput(
+            question="Is the chair next to a bed?",
+            images_paths=[IMG_PATH + "image_1.jpg"],
+        ),
     ]
 
     for extra_calls in extra_tool_calls:

diff --git a/src/rai_bench/rai_bench/tool_calling_agent/tasks/spatial.py b/src/rai_bench/rai_bench/tool_calling_agent/tasks/spatial.py
@@ -25,24 +25,24 @@
 
 loggers_type = logging.Logger
 
-SPATIAL_REASONING_SYSTEM_PROMPT_0_SHOT = """You are a helpful and knowledgeable AI assistant that specializes in interpreting and analyzing visual content. Your task is to answer questions based on the images provided to you. Please response with the use of the provided tools."""
+SPATIAL_REASONING_SYSTEM_PROMPT_0_SHOT = """You are a helpful and knowledgeable AI assistant that specializes
+in interpreting and analyzing visual content. Your task is to answer questions based
+on the images provided to you. Please response with the use of the provided tools."""
+# NOTE (jmatejcz) In this case we are using only one tool so there is no difference bettween 2 and 5 shot
+# so I made 1 example in '2 shot' and 2 examples in '5 shot' prompt
 
 SPATIAL_REASONING_SYSTEM_PROMPT_2_SHOT = (
     SPATIAL_REASONING_SYSTEM_PROMPT_0_SHOT
     + """
 
 Example of tool calls:
-- return_bool_response, args: {'response': True}
-- return_bool_response, args: {'response': False}"""
+- return_bool_response, args: {'response': True}"""
 )
 
-# NOTE (jmatejcz) In this case we are using only one tool so there is no difference bettween 2 and 5 shot
 SPATIAL_REASONING_SYSTEM_PROMPT_5_SHOT = (
     SPATIAL_REASONING_SYSTEM_PROMPT_2_SHOT
     + """
-- return_bool_response, args: {'response': True}  # When object is clearly visible
-- return_bool_response, args: {'response': False}  # When object is not present
-- return_bool_response, args: {'response': True}  # When spatial relationship is correct"""
+- return_bool_response, args: {'response': False}"""
 )