8000 [RLlib] Upgrade RLlink protocol for external env/simulator training. by sven1977 · Pull Request #53550 · ray-project/ray · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

[RLlib] Upgrade RLlink protocol for external env/simulator training. #53550

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/rllib/external-envs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ should step.
.. scale: 75 %
.. A Unity3D soccer game being learnt by RLlib via the ExternalEnv API.

RLlib provides an `external messaging protocol <https://github.com/ray-project/ray/blob/master/rllib/env/utils/external_env_protocol.py>`__
RLlib provides an `external messaging protocol <https://github.com/ray-project/ray/blob/master/rllib/env/external/rllink.py>`__
called :ref:`RLlink <rllink-protocol-docs>` for this purpose as well as the option to customize your :py:class:`~ray.rllib.env.env_runner.EnvRunner` class
toward communicating through :ref:`RLlink <rllink-protocol-docs>` with one or more clients.
An example, `tcp-based EnvRunner implementation with RLlink is available here <https://github.com/ray-project/ray/blob/master/rllib/examples/envs/env_connecting_to_rllib_w_tcp_client.py>`__.
Expand Down
1 change: 1 addition & 0 deletions doc/source/rllib/package_ref/env.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,5 @@ Environment API Reference
env/multi_agent_env.rst
env/multi_agent_env_runner.rst
env/multi_agent_episode.rst
env/external.rst
env/utils.rst
27 changes: 27 additions & 0 deletions doc/source/rllib/package_ref/env/external.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
.. include:: /_includes/rllib/we_are_hiring.rst

.. _env-external-reference-docs:

External Envs
=============

.. include:: /_includes/rllib/new_api_stack.rst

ray.rllib.env.external.rllink.RLlink
------------------------------------

.. currentmodule:: ray.rllib.env.external.rllink

.. autoclass:: ray.rllib.env.external.rllink.RLlink

.. autosummary::
:nosignatures:

~get_rllink_message
~send_rllink_message


ray.rllib.env.external.rllib_gateway.RLlibGateway
-------------------------------------------------

.. autoclass:: ray.rllib.env.external.rllib_gateway.RLlibGateway
1 change: 0 additions & 1 deletion doc/source/rllib/package_ref/env/utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,5 @@ rllib.env.utils
:nosignatures:
:toctree: env/

~external_env_protocol.RLlink
~try_import_open_spiel
~try_import_pyspiel
5 changes: 2 additions & 3 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -3418,7 +3418,7 @@ py_test(
# ....................................
py_test(
name = "examples/algorithms/appo_custom_algorithm_w_shared_data_actor",
size = "medium",
size = "large",
srcs = ["examples/algorithms/appo_custom_algorithm_w_shared_data_actor.py"],
args = [
"--enable-new-api-stack",
Expand Down Expand Up @@ -3456,7 +3456,7 @@ py_test(

py_test(
name = "examples/algorithms/vpg_custom_algorithm",
size = "medium",
size = "large",
srcs = ["examples/algorithms/vpg_custom_algorithm.py"],
args = [
"--enable-new-api-stack",
Expand Down Expand Up @@ -5086,7 +5086,6 @@ py_test(
args = [
"--enable-new-api-stack",
"--as-test",
"--framework=torch",
],
# Include the offline data files.
data = ["tests/data/cartpole/cartpole-v1_large"],
Expand Down
52 changes: 36 additions & 16 deletions rllib/algorithms/algorithm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def __init__(self, algo_class: Optional[type] = None):
self.num_env_runners = 0
self.create_local_env_runner = True
self.num_envs_per_env_runner = 1
# TODO (sven): Once new ormsgpack system in place, reaplce the string
# TODO (sven): Once new ormsgpack system in place, replace the string
# with proper `gym.envs.registration.VectorizeMode.SYNC`.
self.gym_env_vectorize_mode = "SYNC"
self.num_cpus_per_env_runner = 1
Expand Down Expand Up @@ -1056,9 +1056,11 @@ def build_env_to_module_connector(

if env is not None:
obs_space = getattr(env, "single_observation_space", env.observation_space)
else:
assert spaces is not None
elif spaces is not None and INPUT_ENV_SINGLE_SPACES in spaces:
obs_space = spaces[INPUT_ENV_SINGLE_SPACES][0]
else:
assert self.observation_space is not None
obs_space = self.observation_space
if obs_space is None and self.is_multi_agent:
obs_space = gym.spaces.Dict(
{
Expand All @@ -1068,9 +1070,11 @@ def build_env_to_module_connector(
)
if env is not None:
act_space = getattr(env, "single_action_space", env.action_space)
else:
assert spaces is not None
elif spaces is not None and INPUT_ENV_SINGLE_SPACES in spaces:
act_space = spaces[INPUT_ENV_SINGLE_SPACES][1]
else:
assert self.action_space is not None
act_space = self.action_space
if act_space is None and self.is_multi_agent:
act_space = gym.spaces.Dict(
{
Expand Down Expand Up @@ -1160,9 +1164,11 @@ def build_module_to_env_connector(self, env=None, spaces=None) -> ConnectorV2:

if env is not None:
obs_space = getattr(env, "single_observation_space", env.observation_space)
else:
assert spaces is not None
elif spaces is not None and INPUT_ENV_SINGLE_SPACES in spaces:
obs_space = spaces[INPUT_ENV_SINGLE_SPACES][0]
else:
assert self.observation_space is not None
obs_space = self.observation_space
if obs_space is None and self.is_multi_agent:
obs_space = gym.spaces.Dict(
{
Expand All @@ -1172,9 +1178,11 @@ def build_module_to_env_connector(self, env=None, spaces=None) -> ConnectorV2:
)
if env is not None:
act_space = getattr(env, "single_action_space", env.action_space)
else:
assert spaces is not None
elif spaces is not None and INPUT_ENV_SINGLE_SPACES in spaces:
act_space = spaces[INPUT_ENV_SINGLE_SPACES][1]
else:
assert self.action_space is not None
act_space = self.action_space
if act_space is None and self.is_multi_agent:
act_space = gym.spaces.Dict(
{
Expand Down Expand Up @@ -4402,13 +4410,25 @@ def get_rl_module_spec(
)
rl_module_spec = rl_module_spec[DEFAULT_MODULE_ID]

if spaces is not None:
rl_module_spec.observation_space = spaces[DEFAULT_MODULE_ID][0]
rl_module_spec.action_space = spaces[DEFAULT_MODULE_ID][1]
elif env is not None:
if isinstance(env, gym.vector.VectorEnv):
rl_module_spec.observation_space = env.single_observation_space
rl_module_spec.action_space = env.single_action_space
if rl_module_spec.observation_space is None:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dumb question: Can't we reuse the self.obs_space, self.action_space here?

if spaces is not None:
rl_module_spec.observation_space = spaces[DEFAULT_MODULE_ID][0]
elif env is not None and isinstance(env, gym.Env):
rl_module_spec.observation_space = getattr(
env, "single_observation_space", env.observation_space
)
else:
rl_module_spec.observation_space = self.observation_space

if rl_module_spec.action_space is None:
if spaces is not None:
rl_module_spec.action_space = spaces[DEFAULT_MODULE_ID][1]
elif env is not None and isinstance(env, gym.Env):
rl_module_spec.action_space = getattr(
env, "single_action_space", env.action_space
)
else:
rl_module_spec.action_space = self.action_space

# If module_config_dict is not defined, set to our generic one.
if rl_module_spec.model_config is None:
Expand Down
14 changes: 14 additions & 0 deletions rllib/env/external/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from ray.rllib.env.external.rllink import (
get_rllink_message,
send_rllink_message,
RLlink,
)
from ray.rllib.env.external.rllib_gateway import RLlibGateway


__all__ = [
"get_rllink_message",
"RLlibGateway",
"RLlink",
"send_rllink_message",
]
Loading
0