[Rllib] new API stack crashes when using Repeated space

What happened + What you expected to happen

I'm attempting to upgrade our code framework that uses RLLIB and we currently depend on the repeated space object your team has implemented, attempting to use them on 2.44.1 results in a crash

As a Side item is there any documentation support in rllib to support custom gymnasium spaces or even just the entire "core gymnasium" spaces? I don't think there is any way for me to work around this error currently. I would love the capability to implement custom spaces

error message:

(SingleAgentEnvRunner pid=452302)     return func(self, *args, **kwargs)
(SingleAgentEnvRunner pid=452302)   File "/home/clong/.cache/pypoetry/virtualenvs/corl-QTKLdzdH-py3.10/lib/python3.10/site-packages/ray/rllib/execution/rollout_ops.py", line 110, in <lambda>
(SingleAgentEnvRunner pid=452302)     else (lambda w: (w.sample(**random_action_kwargs), w.get_metrics()))
(SingleAgentEnvRunner pid=452302)   File "/home/clong/.cache/pypoetry/virtualenvs/corl-QTKLdzdH-py3.10/lib/python3.10/site-packages/ray/util/tracing/tracing_helper.py", line 463, in _resume_span
(SingleAgentEnvRunner pid=452302)     return method(self, *_args, **_kwargs)
(SingleAgentEnvRunner pid=452302)   File "/home/clong/.cache/pypoetry/virtualenvs/corl-QTKLdzdH-py3.10/lib/python3.10/site-packages/ray/rllib/env/single_agent_env_runner.py", line 206, in sample
(SingleAgentEnvRunner pid=452302)     samples = self._sample(
(SingleAgentEnvRunner pid=452302)   File "/home/clong/.cache/pypoetry/virtualenvs/corl-QTKLdzdH-py3.10/lib/python3.10/site-packages/ray/util/tracing/tracing_helper.py", line 463, in _resume_span
(SingleAgentEnvRunner pid=452302)     return method(self, *_args, **_kwargs)
(SingleAgentEnvRunner pid=452302)   File "/home/clong/.cache/pypoetry/virtualenvs/corl-QTKLdzdH-py3.10/lib/python3.10/site-packages/ray/rllib/env/single_agent_env_runner.py", line 261, in _sample
(SingleAgentEnvRunner pid=452302)     self._reset_envs(episodes, shared_data, explore)
(SingleAgentEnvRunner pid=452302)   File "/home/clong/.cache/pypoetry/virtualenvs/corl-QTKLdzdH-py3.10/lib/python3.10/site-packages/ray/util/tracing/tracing_helper.py", line 463, in _resume_span
(SingleAgentEnvRunner pid=452302)     return method(self, *_args, **_kwargs)
(SingleAgentEnvRunner pid=452302)   File "/home/clong/.cache/pypoetry/virtualenvs/corl-QTKLdzdH-py3.10/lib/python3.10/site-packages/ray/rllib/env/single_agent_env_runner.py", line 729, in _reset_envs
(SingleAgentEnvRunner pid=452302)     episodes[env_index].add_env_reset(
(SingleAgentEnvRunner pid=452302)   File "/home/clong/.cache/pypoetry/virtualenvs/corl-QTKLdzdH-py3.10/lib/python3.10/site-packages/ray/rllib/env/single_agent_episode.py", line 376, in add_env_reset
(SingleAgentEnvRunner pid=452302)     assert self.observation_space.contains(observation), (
(SingleAgentEnvRunner pid=452302) AssertionError: `observation` {'angular-pos': {'some_random_stuff': 1, 'value': 0.037983317}, 'test': ([-0.41928768, 1.0369213],), 'velocs': (array([0.04648657], dtype=float32), 0.0046693585), 'x-pos': array([-0.01215742], dtype=float32)} does NOT fit SingleAgentEpisode's observation_space: Dict('angular-pos': Dict('some_random_stuff': Discrete(3), 'value': Box(-0.41887903, 0.41887903, (), float32)), 'test': Repeated(Box(-inf, inf, (1,), float32), 2), 'velocs': Tuple(Box(-inf, inf, (1,), float32), Box(-inf, inf, (), float32)), 'x-pos': Box(-4.8, 4.8, (1,), float32))!

Versions / Dependencies

ray 2.44.1
torch 2.6
numpy 1.26.1

Reproduction script

(grabed from your unit tests, use --enable-new-api-stack and you will get the fail, the old stack works fine

from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
from ray.rllib.utils.test_utils import (
    add_rllib_example_script_args,
    run_rllib_example_script_experiment,
)
from ray.tune.registry import get_trainable_cls, register_env

parser = add_rllib_example_script_args(
    default_iters=200,
    default_timesteps=100000,
    default_reward=600.0,
)
# TODO (sven): This arg is currently ignored (hard-set to 2).
# parser.add_argument("--num-policies", type=int, default=2)

import gymnasium as gym
from gymnasium.envs.classic_control.cartpole import CartPoleEnv
import numpy as np
from gymnasium.spaces.sequence import Sequence
from ray.rllib.utils.spaces.repeated import Repeated


class CartPoleWithDictObservationSpace(CartPoleEnv):
    """CartPole gym environment that has a dict observation space.

    However, otherwise, the information content in each observation remains the same.

    https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/envs/classic_control/cartpole.py  # noqa

    The new observation space looks as follows (a little quirky, but this is
    for testing purposes only):

    gym.spaces.Dict({
        "x-pos": [x-pos],
        "angular-pos": gym.spaces.Dict({"test": [angular-pos]}),
        "velocs": gym.spaces.Tuple([x-veloc, angular-veloc]),
    })
    """

    def __init__(self, config=None):
        super().__init__()

        # Fix our observation-space as described above.
        low = self.observation_space.low
        high = self.observation_space.high

        # Test as many quirks and oddities as possible: Dict, Dict inside a Dict,
        # Tuple inside a Dict, and both (1,)-shapes as well as ()-shapes for Boxes.
        # Also add a random discrete variable here.
        
        self.observation_space = gym.spaces.Dict(
            {
                "x-pos": gym.spaces.Box(low[0], high[0], (1,), dtype=np.float32),
                "angular-pos": gym.spaces.Dict(
                    {
                        "value": gym.spaces.Box(low[2], high[2], (), dtype=np.float32),
                        # Add some random non-essential information.
                        "some_random_stuff": gym.spaces.Discrete(3),
                    }
                ),
                "velocs": gym.spaces.Tuple(
                    [
                        # x-veloc
                        gym.spaces.Box(low[1], high[1], (1,), dtype=np.float32),
                        # angular-veloc
                        gym.spaces.Box(low[3], high[3], (), dtype=np.float32),
                    ]
                ),
                "test": Repeated(gym.spaces.Box(low[1], high[1], (1,), dtype=np.float32), max_len=2),
                # "test2": Sequence(gym.spaces.Discrete(5))
            }
        )

    def step(self, action):
        next_obs, reward, done, truncated, info = super().step(action)
        return self._compile_current_obs(next_obs), reward, done, truncated, info

    def reset(self, *, seed=None, options=None):
        init_obs, init_info = super().reset(seed=seed, options=options)
        return self._compile_current_obs(init_obs), init_info

    def _compile_current_obs(self, original_cartpole_obs):
        # original_cartpole_obs is [x-pos, x-veloc, angle, angle-veloc]
        return {
            "x-pos": np.array([original_cartpole_obs[0]], np.float32),
            "angular-pos": {
                "value": np.array(original_cartpole_obs[2]),
                "some_random_stuff": np.random.randint(3),
            },
            "velocs": (
                np.array([original_cartpole_obs[1]], np.float32),
                np.array(original_cartpole_obs[3], np.float32),
            ),
            "test": self.observation_space["test"].sample(),
            # "test2": self.observation_space["test2"].sample()
        }


register_env(
            "env",
            lambda _: CartPoleWithDictObservationSpace(config=None),
        )
from ray.rllib.connectors.env_to_module import FlattenObservations
def _env_to_module_pipeline(env):
    return FlattenObservations(multi_agent=False)

if __name__ == "__main__":
    args = parser.parse_args()
        

    base_config = (
        get_trainable_cls(args.algo)
        .get_default_config()
        .environment("env")
        .env_runners(
            num_env_runners=10,
            env_to_module_connector=_env_to_module_pipeline,
            num_envs_per_env_runner=1
        )
        # .training(
        #     train_batch_size_per_learner=40000,
        #     minibatch_size=4000,
        # )
    )

    run_rllib_example_script_experiment(base_config, args)

Issue Severity

High: It blocks me from completing my task.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

What happened + What you expected to happen

Versions / Dependencies

Reproduction script

Issue Severity

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Description

What happened + What you expected to happen

Versions / Dependencies

Reproduction script

Issue Severity

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions