ray-project · sven1977 · Jun 4, 2025 · Jun 4, 2025 · Jun 5, 2025 · Jun 5, 2025
@@ -30,7 +30,7 @@ should step.
 .. scale: 75 %
 ..    A Unity3D soccer game being learnt by RLlib via the ExternalEnv API.
 
-RLlib provides an `external messaging protocol <https://github.com/ray-project/ray/blob/master/rllib/env/utils/external_env_protocol.py>`__
+RLlib provides an `external messaging protocol <https://github.com/ray-project/ray/blob/master/rllib/env/external/rllink.py>`__
 called :ref:`RLlink <rllink-protocol-docs>` for this purpose as well as the option to customize your :py:class:`~ray.rllib.env.env_runner.EnvRunner` class
 toward communicating through :ref:`RLlink <rllink-protocol-docs>` with one or more clients.
 An example, `tcp-based EnvRunner implementation with RLlink is available here <https://github.com/ray-project/ray/blob/master/rllib/examples/envs/env_connecting_to_rllib_w_tcp_client.py>`__.

@@ -55,4 +55,5 @@ Environment API Reference
    env/multi_agent_env.rst
    env/multi_agent_env_runner.rst
    env/multi_agent_episode.rst
+   env/external.rst
    env/utils.rst
@@ -0,0 +1,27 @@
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. _env-external-reference-docs:
+
+External Envs
+=============
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
+ray.rllib.env.external.rllink.RLlink
+------------------------------------
+
+.. currentmodule:: ray.rllib.env.external.rllink
+
+.. autoclass:: ray.rllib.env.external.rllink.RLlink
+
+.. autosummary::
+   :nosignatures:
+
+   ~get_rllink_message
+   ~send_rllink_message
+
+
+ray.rllib.env.external.rllib_gateway.RLlibGateway
+-------------------------------------------------
+
+.. autoclass:: ray.rllib.env.external.rllib_gateway.RLlibGateway
@@ -16,6 +16,5 @@ rllib.env.utils
    :nosignatures:
    :toctree: env/
 
-   ~external_env_protocol.RLlink
    ~try_import_open_spiel
    ~try_import_pyspiel
@@ -3418,7 +3418,7 @@ py_test(
 # ....................................
 py_test(
     name = "examples/algorithms/appo_custom_algorithm_w_shared_data_actor",
-    size = "medium",
+    size = "large",
     srcs = ["examples/algorithms/appo_custom_algorithm_w_shared_data_actor.py"],
     args = [
         "--enable-new-api-stack",
@@ -3456,7 +3456,7 @@ py_test(
 
 py_test(
     name = "examples/algorithms/vpg_custom_algorithm",
-    size = "medium",
+    size = "large",
     srcs = ["examples/algorithms/vpg_custom_algorithm.py"],
     args = [
         "--enable-new-api-stack",
@@ -5086,7 +5086,6 @@ py_test(
     args = [
         "--enable-new-api-stack",
         "--as-test",
-        "--framework=torch",
     ],
     # Include the offline data files.
     data = ["tests/data/cartpole/cartpole-v1_large"],

@@ -324,7 +324,7 @@ def __init__(self, algo_class: Optional[type] = None):
         self.num_env_runners = 0
         self.create_local_env_runner = True
         self.num_envs_per_env_runner = 1
-        # TODO (sven): Once new ormsgpack system in place, reaplce the string
+        # TODO (sven): Once new ormsgpack system in place, replace the string
         #  with proper `gym.envs.registration.VectorizeMode.SYNC`.
         self.gym_env_vectorize_mode = "SYNC"
         self.num_cpus_per_env_runner = 1
@@ -1056,9 +1056,11 @@ def build_env_to_module_connector(
 
         if env is not None:
             obs_space = getattr(env, "single_observation_space", env.observation_space)
-        else:
-            assert spaces is not None
+        elif spaces is not None and INPUT_ENV_SINGLE_SPACES in spaces:
             obs_space = spaces[INPUT_ENV_SINGLE_SPACES][0]
+        else:
+            assert self.observation_space is not None
+            obs_space = self.observation_space
         if obs_space is None and self.is_multi_agent:
             obs_space = gym.spaces.Dict(
                 {
@@ -1068,9 +1070,11 @@ def build_env_to_module_connector(
             )
         if env is not None:
             act_space = getattr(env, "single_action_space", env.action_space)
-        else:
-            assert spaces is not None
+        elif spaces is not None and INPUT_ENV_SINGLE_SPACES in spaces:
             act_space = spaces[INPUT_ENV_SINGLE_SPACES][1]
+        else:
+            assert self.action_space is not None
+            act_space = self.action_space
         if act_space is None and self.is_multi_agent:
             act_space = gym.spaces.Dict(
                 {
@@ -1160,9 +1164,11 @@ def build_module_to_env_connector(self, env=None, spaces=None) -> ConnectorV2:
 
         if env is not None:
             obs_space = getattr(env, "single_observation_space", env.observation_space)
-        else:
-            assert spaces is not None
+        elif spaces is not None and INPUT_ENV_SINGLE_SPACES in spaces:
             obs_space = spaces[INPUT_ENV_SINGLE_SPACES][0]
+        else:
+            assert self.observation_space is not None
+            obs_space = self.observation_space
         if obs_space is None and self.is_multi_agent:
             obs_space = gym.spaces.Dict(
                 {
@@ -1172,9 +1178,11 @@ def build_module_to_env_connector(self, env=None, spaces=None) -> ConnectorV2:
             )
         if env is not None:
             act_space = getattr(env, "single_action_space", env.action_space)
-        else:
-            assert spaces is not None
+        elif spaces is not None and INPUT_ENV_SINGLE_SPACES in spaces:
             act_space = spaces[INPUT_ENV_SINGLE_SPACES][1]
+        else:
+            assert self.action_space is not None
+            act_space = self.action_space
         if act_space is None and self.is_multi_agent:
             act_space = gym.spaces.Dict(
                 {
@@ -4402,13 +4410,25 @@ def get_rl_module_spec(
                 )
             rl_module_spec = rl_module_spec[DEFAULT_MODULE_ID]
 
-        if spaces is not None:
-            rl_module_spec.observation_space = spaces[DEFAULT_MODULE_ID][0]
-            rl_module_spec.action_space = spaces[DEFAULT_MODULE_ID][1]
-        elif env is not None:
-            if isinstance(env, gym.vector.VectorEnv):
-                rl_module_spec.observation_space = env.single_observation_space
-            rl_module_spec.action_space = env.single_action_space
+        if rl_module_spec.observation_space is None:
+            if spaces is not None:
+                rl_module_spec.observation_space = spaces[DEFAULT_MODULE_ID][0]
+            elif env is not None and isinstance(env, gym.Env):
+                rl_module_spec.observation_space = getattr(
+                    env, "single_observation_space", env.observation_space
+                )
+            else:
+                rl_module_spec.observation_space = self.observation_space
+
+        if rl_module_spec.action_space is None:
+            if spaces is not None:
+                rl_module_spec.action_space = spaces[DEFAULT_MODULE_ID][1]
+            elif env is not None and isinstance(env, gym.Env):
+                rl_module_spec.action_space = getattr(
+                    env, "single_action_space", env.action_space
+                )
+            else:
+                rl_module_spec.action_space = self.action_space
 
         # If module_config_dict is not defined, set to our generic one.
         if rl_module_spec.model_config is None:

@@ -0,0 +1,14 @@
+from ray.rllib.env.external.rllink import (
+    get_rllink_message,
+    send_rllink_message,
+    RLlink,
+)
+from ray.rllib.env.external.rllib_gateway import RLlibGateway
+
+
+__all__ = [
+    "get_rllink_message",
+    "RLlibGateway",
+    "RLlink",
+    "send_rllink_message",
+]