Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify registration to only the v5 environments #561

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
124 changes: 62 additions & 62 deletions docs/_scripts/environment-docs.json

Large diffs are not rendered by default.

77 changes: 77 additions & 0 deletions docs/_scripts/gen_environment_page.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import itertools

import ale_py
import gymnasium
import tabulate
from ale_py.registration import _rom_id_to_name
from tqdm import tqdm

gymnasium.register_envs(ale_py)

impossible_roms = {"maze_craze", "joust", "warlords", "combat"}
ALL_ATARI_GAMES = {
env_spec.kwargs["game"]
for env_spec in gymnasium.registry.values()
if isinstance(env_spec.entry_point, str)
and "ale_py" in env_spec.entry_point
and env_spec.kwargs["game"] not in impossible_roms
}

# Generate the list of all atari games on atari.md
for rom_id in sorted(ALL_ATARI_GAMES):
print(f"atari/{rom_id}")


def generate_value_ranges(values):
for a, b in itertools.groupby(enumerate(values), lambda pair: pair[1] - pair[0]):
b = list(b)
yield b[0][1], b[-1][1]


def shortened_repr(values):
output = []
for low, high in generate_value_ranges(values):
if high - low < 5:
output.append(", ".join(map(str, range(low, high + 1))))
else:
output.append(f"{low}, ..., {high}")
return "[" + ", ".join(output) + "]"


# # Generate difficult levels table on atari.md
headers = [
"Environment",
"Possible Modes",
"Default Mode",
"Possible Difficulties",
"Default Difficulty",
]
rows = []

for rom_id in tqdm(ALL_ATARI_GAMES):
env_name = _rom_id_to_name(rom_id)

env = gymnasium.make(f"ALE/{env_name}-v5").unwrapped

available_difficulties = env.ale.getAvailableDifficulties()
default_difficulty = env.ale.cloneState().getDifficulty()
available_modes = env.ale.getAvailableModes()
default_mode = env.ale.cloneState().getCurrentMode()

if env_name == "VideoCube":
available_modes = "[0, 1, 2, 100, 101, 102, ..., 5000, 5001, 5002]"
else:
available_modes = shortened_repr(available_modes)

rows.append(
[
env_name,
available_modes,
default_mode,
shortened_repr(available_difficulties),
default_difficulty,
]
)
env.close()

print(tabulate.tabulate(rows, headers=headers, tablefmt="github"))
112 changes: 36 additions & 76 deletions docs/_scripts/gen_environments_md.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import ale_py
import gymnasium
import tabulate
from ale_py.registration import _rom_id_to_name
from ale_py.registration import rom_id_to_name
from tqdm import tqdm

gymnasium.register_envs(ale_py)
Expand All @@ -18,10 +18,6 @@
and env_spec.kwargs["game"] not in impossible_roms
}

# Generate the list of all atari games on atari.md
for rom_id in sorted(ALL_ATARI_GAMES):
print(f"atari/{rom_id}")


def generate_value_ranges(values):
for a, b in itertools.groupby(enumerate(values), lambda pair: pair[1] - pair[0]):
Expand All @@ -39,83 +35,51 @@ def shortened_repr(values):
return "[" + ", ".join(output) + "]"


# # Test examples
# print(shortened_repr([0]))
# print(shortened_repr([1, 2, 3]))
# print(shortened_repr([0, 1, 2, 3]))
# print(shortened_repr([0, 4, 8, 12, 16, 20, 24, 28]))
# print(shortened_repr(list(range(32)) + [128]))


# # Generate difficult levels table on atari.md
headers = [
"Environment",
"Possible Modes",
"Default Mode",
"Possible Difficulties",
"Default Difficulty",
]
rows = []
# Generate each pages results
with open("environment-docs.json") as file:
atari_data = json.load(file)

for rom_id in tqdm(ALL_ATARI_GAMES):
env_name = _rom_id_to_name(rom_id)
env_name = rom_id_to_name(rom_id)

env = gymnasium.make(f"ALE/{env_name}-v5").unwrapped

available_difficulties = env.ale.getAvailableDifficulties()
default_difficulty = env.ale.cloneState().getDifficulty()
available_modes = env.ale.getAvailableModes()
default_mode = env.ale.cloneState().getCurrentMode()

if env_name == "VideoCube":
available_modes = "[0, 1, 2, 100, 101, 102, ..., 5000, 5001, 5002]"
else:
available_modes = shortened_repr(available_modes)

rows.append(
[
env_name,
available_modes,
default_mode,
shortened_repr(available_difficulties),
default_difficulty,
]
general_info_table = tabulate.tabulate([
["Make", f'gymnasium.make("ALE/{env_name}-v5")'],
["Action Space", str(env.action_space)],
["Observation Space", str(env.observation_space)],
],
headers=["", ""],
tablefmt="github",
)
env.close()

print(tabulate.tabulate(rows, headers=headers, tablefmt="github"))

# Generate each pages results
with open("atari-docs.json") as file:
atari_data = json.load(file)

for rom_id in tqdm(ALL_ATARI_GAMES):
env_name = _rom_id_to_name(rom_id)

env = gymnasium.make(f"ALE/{env_name}-v5").unwrapped
if rom_id in atari_data:
env_data = atari_data[rom_id]

env_description = env_data["env_description"]
if env_data["atariage_url"]:
env_url = f"""
env_description = env_data["env_description"]

if env_data["atariage_url"]:
env_url = f"""
For a more detailed documentation, see [the AtariAge page]({env_data['atariage_url']})
"""
else:
env_url = ""
reward_description = env_data["reward_description"]
else:
# Add the information to `atari_docs.json` and rerun this file to generate the new documentation
env_description = f"{env_name} is missing description documentation. If you are interested in writing up a description, please create an issue or PR with the information on the Gymnasium github."
env_url = ""

if env_data["reward_description"]:
reward_description = f"""
### Reward

{env_data["reward_description"]}
"""
else:
reward_description = ""

table_values = map(
action_table_values = map(
lambda s: f"`{s}`",
itertools.chain(*zip(range(env.action_space.n), env.get_action_meanings())),
)
default_action_table = tabulate.tabulate(
list(itertools.zip_longest(*([iter(table_values)] * 6), fillvalue="")),
list(itertools.zip_longest(*([iter(action_table_values)] * 6), fillvalue="")),
headers=["Value", "Meaning", "Value", "Meaning", "Value", "Meaning"],
tablefmt="github",
)
Expand Down Expand Up @@ -148,7 +112,7 @@ def shortened_repr(values):
env_spec.id,
f'`"{env_spec.kwargs["obs_type"]}"`',
f'`{env_spec.kwargs["frameskip"]}`',
f'`{env_spec.kwargs["repeat_action_probability"]}`',
f'`{env_spec.kwargs["repeat_action_probability"]:.2f}`',
]
for env_spec in env_specs
]
Expand Down Expand Up @@ -184,18 +148,14 @@ def shortened_repr(values):

# {env_name}

```{{figure}} ../../_static/videos/atari/{rom_id}.gif
```{{figure}} ../_static/videos/environments/{rom_id}.gif
:width: 120px
:name: {env_name}
```

This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.

| | |
|---|---|
| Action Space | {env.action_space} |
| Observation Space | {env.observation_space} |
| Import | `gymnasium.make("{env.spec.id}")` |
{general_info_table}

For more {env_name} variants with different observation and action spaces, see the variants section.

Expand All @@ -213,23 +173,23 @@ def shortened_repr(values):

## Observations

Atari environments have three possible observation types: `"rgb"`, `"grayscale"` and `"ram"`.
Atari environments have three possible observation types:

- `obs_type="rgb" -> observation_space=Box(0, 255, (210, 160, 3), np.uint8)`
- `obs_type="ram" -> observation_space=Box(0, 255, (128,), np.uint8)`
- `obs_type="grayscale" -> Box(0, 255, (210, 160), np.uint8)`, a grayscale version of the "rgb" type
- `obs_type="rgb"` -> `observation_space=Box(0, 255, (210, 160, 3), np.uint8)`
- `obs_type="ram"` -> `observation_space=Box(0, 255, (128,), np.uint8)`
- `obs_type="grayscale"` -> `Box(0, 255, (210, 160), np.uint8)`, a grayscale version of the q"rgb" type

See variants section for the type of observation used by each environment id by default.

{reward_description}

## Variants

{env_name} has the following variants of the environment id which have the following differences in observation,
the number of frame-skips and the repeat action probability.

{env_variant_table}

See the [version history page](https://ale.farama.org/environments/#version-history-and-naming-schemes) to implement previously implemented environments, e.g., `{env_name}NoFrameskip-v4`.

## Difficulty and modes

It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
Expand All @@ -246,5 +206,5 @@ def shortened_repr(values):
* v4: Stickiness of actions was removed
* v0: Initial versions release
"""
with open(f"../environments/atari/{rom_id}.md", "w") as file:
with open(f"../environments/{rom_id}.md", "w") as file:
file.write(TEMPLATE)
65 changes: 28 additions & 37 deletions docs/environments.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,21 +142,19 @@ The Atari environments observation can be

## Rewards

The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
find these manuals on [AtariAge](https://atariage.com/).
The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/).

## Stochasticity

As the Atari games are entirely deterministic, agents could achieve
state-of-the-art performance by simply memorizing an optimal sequence of actions while completely ignoring observations from the environment.
As the Atari games are entirely deterministic, agents can achieve state-of-the-art performance by simply memorizing an optimal sequence of actions while completely ignoring observations from the environment.

To avoid this, there are several methods to avoid this.

1. Sticky actions: Instead of always simulating the action passed to the environment, there is a small
probability that the previously executed action is used instead. In the v0 and v5 environments, the probability of
repeating an action is `25%` while in v4 environments, the probability is `0%`. Users can specify the repeat action
probability using `repeat_action_probability` to `make`.
2. Frameskipping: On each environment step, the action can be repeated for a random number of frames. This behavior
2. Frame-skipping: On each environment step, the action can be repeated for a random number of frames. This behavior
may be altered by setting the keyword argument `frameskip` to either a positive integer or
a tuple of two positive integers. If `frameskip` is an integer, frame skipping is deterministic, and in each step the action is
repeated `frameskip` many times. Otherwise, if `frameskip` is a tuple, the number of skipped frames is chosen uniformly at
Expand Down Expand Up @@ -190,38 +188,31 @@ action space will be reduced to a subset.

## Version History and Naming Schemes

All Atari games are available in three versions. They differ in the default settings of the arguments above.
The differences are listed in the following table:

| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
|---------|--------------|------------------------------|----------------------|
| v0 | `(2, 5,)` | `0.25` | `False` |
| v4 | `(2, 5,)` | `0.0` | `False` |
| v5 | `4` | `0.25` | `False` |

> Version v5 follows the best practices outlined in [[2]](#2). Thus, it is recommended to transition to v5 and
customize the environment using the arguments above, if necessary.

For each Atari game, several different configurations are registered in Gymnasium. The naming schemes are analogous for
v0 and v4. Let us take a look at all variations of Amidar-v0 that are registered with gymnasium:

| Name | `obs_type=` | `frameskip=` | `repeat_action_probability=` |
|----------------------------|-------------|--------------|------------------------------|
| Amidar-v0 | `"rgb"` | `(2, 5,)` | `0.25` |
| AmidarDeterministic-v0 | `"rgb"` | `4` | `0.0` |
| AmidarNoframeskip-v0 | `"rgb"` | `1` | `0.25` |
| Amidar-ram-v0 | `"ram"` | `(2, 5,)` | `0.25` |
| Amidar-ramDeterministic-v0 | `"ram"` | `4` | `0.0` |
| Amidar-ramNoframeskip-v0 | `"ram"` | `1` | `0.25` |

Things change in v5: The suffixes "Deterministic" and "NoFrameskip" are no longer available. Instead, you must specify the
environment configuration via arguments passed to `gymnasium.make`. Moreover, the v5 environments
are in the "ALE" namespace. The suffix "-ram" is still available. Thus, we get the following table:

| Name | `obs_type=` | `frameskip=` | `repeat_action_probability=` |
|-------------------|-------------|--------------|------------------------------|
| ALE/Amidar-v5 | `"rgb"` | `4` | `0.25` |
| ALE/Amidar-ram-v5 | `"ram"` | `4` | `0.25` |
In v0.11, the number of registered Atari environments does significantly reduced from 960 to 105 to only register `ALE/{rom_name}-v5` following the best practices outlined in [[2]](#2).

| Name | `obs_type=` | `frameskip=` | `repeat_action_probability=` | `full_ation_space=` |
|------------------|-------------|--------------|------------------------------|---------------------|
| ALE/Adventure-v5 | `"rgb"` | `4` | `0.25` | `False` |

Importantly, `repeat_action_probability=0.25` can negatively impact the performance of agents so when comparing training graphs, be aware of the parameters used for fair comparisons.

To create previously implemented environment use the following parameters, `gymnasium.make(env_id, obs_type=..., frameskip=..., repeat_action_probability=..., full_action_space=...)`.

| Name | `obs_type=` | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
|-------------------------------|-------------|--------------|------------------------------|----------------------|
| Adventure-v0 | `"rgb"` | `(2, 5,)` | `0.25` | `False` |
| AdventureDeterministic-v0 | `"rgb"` | `4` | `0.25` | `False` |
| AdventureNoframeskip-v0 | `"rgb"` | `1` | `0.25` | `False` |
| Adventure-ram-v0 | `"ram"` | `(2, 5,)` | `0.25` | `False` |
| Adventure-ramDeterministic-v0 | `"ram"` | `4` | `0.25` | `False` |
| Adventure-ramNoframeskip-v0 | `"ram"` | `1` | `0.25` | `False` |
| Adventure-v4 | `"rgb"` | `(2, 5,)` | `0.0` | `False` |
| AdventureDeterministic-v4 | `"rgb"` | `4` | `0.0` | `False` |
| AdventureNoframeskip-v4 | `"rgb"` | `1` | `0.0` | `False` |
| Adventure-ram-v4 | `"ram"` | `(2, 5,)` | `0.0` | `False` |
| Adventure-ramDeterministic-v4 | `"ram"` | `4` | `0.0` | `False` |
| Adventure-ramNoframeskip-v4 | `"ram"` | `1` | `0.0` | `False` |
| ALE/Adventure-ram-v5 | `"ram"` | `4` | `0.25` | `False` |

## Flavors

Expand Down
Loading