Farama-Foundation · pseudo-rnd-thoughts · Sep 17, 2024 · Sep 18, 2024 · Sep 18, 2024 · Sep 23, 2024
diff --git a/docs/_scripts/environment-docs.json b/docs/_scripts/environment-docs.json
diff --git a/docs/_scripts/gen_environment_page.py b/docs/_scripts/gen_environment_page.py
@@ -0,0 +1,77 @@
+import itertools
+
+import ale_py
+import gymnasium
+import tabulate
+from ale_py.registration import _rom_id_to_name
+from tqdm import tqdm
+
+gymnasium.register_envs(ale_py)
+
+impossible_roms = {"maze_craze", "joust", "warlords", "combat"}
+ALL_ATARI_GAMES = {
+    env_spec.kwargs["game"]
+    for env_spec in gymnasium.registry.values()
+    if isinstance(env_spec.entry_point, str)
+    and "ale_py" in env_spec.entry_point
+    and env_spec.kwargs["game"] not in impossible_roms
+}
+
+# Generate the list of all atari games on atari.md
+for rom_id in sorted(ALL_ATARI_GAMES):
+    print(f"atari/{rom_id}")
+
+
+def generate_value_ranges(values):
+    for a, b in itertools.groupby(enumerate(values), lambda pair: pair[1] - pair[0]):
+        b = list(b)
+        yield b[0][1], b[-1][1]
+
+
+def shortened_repr(values):
+    output = []
+    for low, high in generate_value_ranges(values):
+        if high - low < 5:
+            output.append(", ".join(map(str, range(low, high + 1))))
+        else:
+            output.append(f"{low}, ..., {high}")
+    return "[" + ", ".join(output) + "]"
+
+
+# # Generate difficult levels table on atari.md
+headers = [
+    "Environment",
+    "Possible Modes",
+    "Default Mode",
+    "Possible Difficulties",
+    "Default Difficulty",
+]
+rows = []
+
+for rom_id in tqdm(ALL_ATARI_GAMES):
+    env_name = _rom_id_to_name(rom_id)
+
+    env = gymnasium.make(f"ALE/{env_name}-v5").unwrapped
+
+    available_difficulties = env.ale.getAvailableDifficulties()
+    default_difficulty = env.ale.cloneState().getDifficulty()
+    available_modes = env.ale.getAvailableModes()
+    default_mode = env.ale.cloneState().getCurrentMode()
+
+    if env_name == "VideoCube":
+        available_modes = "[0, 1, 2, 100, 101, 102, ..., 5000, 5001, 5002]"
+    else:
+        available_modes = shortened_repr(available_modes)
+
+    rows.append(
+        [
+            env_name,
+            available_modes,
+            default_mode,
+            shortened_repr(available_difficulties),
+            default_difficulty,
+        ]
+    )
+    env.close()
+
+print(tabulate.tabulate(rows, headers=headers, tablefmt="github"))
diff --git a/docs/_scripts/gen_environments_md.py b/docs/_scripts/gen_environments_md.py
@@ -4,7 +4,7 @@
 import ale_py
 import gymnasium
 import tabulate
-from ale_py.registration import _rom_id_to_name
+from ale_py.registration import rom_id_to_name
 from tqdm import tqdm
 
 gymnasium.register_envs(ale_py)
@@ -18,10 +18,6 @@
     and env_spec.kwargs["game"] not in impossible_roms
 }
 
-# Generate the list of all atari games on atari.md
-for rom_id in sorted(ALL_ATARI_GAMES):
-    print(f"atari/{rom_id}")
-
 
 def generate_value_ranges(values):
     for a, b in itertools.groupby(enumerate(values), lambda pair: pair[1] - pair[0]):
@@ -39,83 +35,51 @@ def shortened_repr(values):
     return "[" + ", ".join(output) + "]"
 
 
-# # Test examples
-# print(shortened_repr([0]))
-# print(shortened_repr([1, 2, 3]))
-# print(shortened_repr([0, 1, 2, 3]))
-# print(shortened_repr([0, 4, 8, 12, 16, 20, 24, 28]))
-# print(shortened_repr(list(range(32)) + [128]))
-
-
-# # Generate difficult levels table on atari.md
-headers = [
-    "Environment",
-    "Possible Modes",
-    "Default Mode",
-    "Possible Difficulties",
-    "Default Difficulty",
-]
-rows = []
+# Generate each pages results
+with open("environment-docs.json") as file:
+    atari_data = json.load(file)
 
 for rom_id in tqdm(ALL_ATARI_GAMES):
-    env_name = _rom_id_to_name(rom_id)
+    env_name = rom_id_to_name(rom_id)
 
     env = gymnasium.make(f"ALE/{env_name}-v5").unwrapped
 
-    available_difficulties = env.ale.getAvailableDifficulties()
-    default_difficulty = env.ale.cloneState().getDifficulty()
-    available_modes = env.ale.getAvailableModes()
-    default_mode = env.ale.cloneState().getCurrentMode()
-
-    if env_name == "VideoCube":
-        available_modes = "[0, 1, 2, 100, 101, 102, ..., 5000, 5001, 5002]"
-    else:
-        available_modes = shortened_repr(available_modes)
-
-    rows.append(
-        [
-            env_name,
-            available_modes,
-            default_mode,
-            shortened_repr(available_difficulties),
-            default_difficulty,
-        ]
+    general_info_table = tabulate.tabulate([
+            ["Make", f'gymnasium.make("ALE/{env_name}-v5")'],
+            ["Action Space", str(env.action_space)],
+            ["Observation Space", str(env.observation_space)],
+        ],
+        headers=["", ""],
+        tablefmt="github",
     )
-    env.close()
-
-print(tabulate.tabulate(rows, headers=headers, tablefmt="github"))
 
-# Generate each pages results
-with open("atari-docs.json") as file:
-    atari_data = json.load(file)
-
-for rom_id in tqdm(ALL_ATARI_GAMES):
-    env_name = _rom_id_to_name(rom_id)
-
-    env = gymnasium.make(f"ALE/{env_name}-v5").unwrapped
     if rom_id in atari_data:
         env_data = atari_data[rom_id]
 
-        env_description = env_data["env_description"]
-        if env_data["atariage_url"]:
-            env_url = f"""
+    env_description = env_data["env_description"]
+
+    if env_data["atariage_url"]:
+        env_url = f"""
 For a more detailed documentation, see [the AtariAge page]({env_data['atariage_url']})
 """
-        else:
-            env_url = ""
-        reward_description = env_data["reward_description"]
     else:
-        # Add the information to `atari_docs.json` and rerun this file to generate the new documentation
-        env_description = f"{env_name} is missing description documentation. If you are interested in writing up a description, please create an issue or PR with the information on the Gymnasium github."
         env_url = ""
+
+    if env_data["reward_description"]:
+        reward_description = f"""
+### Reward
+
+{env_data["reward_description"]}
+"""
+    else:
         reward_description = ""
 
-    table_values = map(
+    action_table_values = map(
         lambda s: f"`{s}`",
         itertools.chain(*zip(range(env.action_space.n), env.get_action_meanings())),
     )
     default_action_table = tabulate.tabulate(
-        list(itertools.zip_longest(*([iter(table_values)] * 6), fillvalue="")),
+        list(itertools.zip_longest(*([iter(action_table_values)] * 6), fillvalue="")),
         headers=["Value", "Meaning", "Value", "Meaning", "Value", "Meaning"],
         tablefmt="github",
     )
@@ -148,7 +112,7 @@ def shortened_repr(values):
             env_spec.id,
             f'`"{env_spec.kwargs["obs_type"]}"`',
             f'`{env_spec.kwargs["frameskip"]}`',
-            f'`{env_spec.kwargs["repeat_action_probability"]}`',
+            f'`{env_spec.kwargs["repeat_action_probability"]:.2f}`',
         ]
         for env_spec in env_specs
     ]
@@ -184,18 +148,14 @@ def shortened_repr(values):
 
 # {env_name}
 
-```{{figure}} ../../_static/videos/atari/{rom_id}.gif
+```{{figure}} ../_static/videos/environments/{rom_id}.gif
 :width: 120px
 :name: {env_name}
 ```
 
 This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
 
-|   |   |
-|---|---|
-| Action Space | {env.action_space} |
-| Observation Space | {env.observation_space} |
-| Import | `gymnasium.make("{env.spec.id}")` |
+{general_info_table}
 
 For more {env_name} variants with different observation and action spaces, see the variants section.
 
@@ -213,23 +173,23 @@ def shortened_repr(values):
 
 ## Observations
 
-Atari environments have three possible observation types: `"rgb"`, `"grayscale"` and `"ram"`.
+Atari environments have three possible observation types:
 
-- `obs_type="rgb" -> observation_space=Box(0, 255, (210, 160, 3), np.uint8)`
-- `obs_type="ram" -> observation_space=Box(0, 255, (128,), np.uint8)`
-- `obs_type="grayscale" -> Box(0, 255, (210, 160), np.uint8)`, a grayscale version of the "rgb" type
+- `obs_type="rgb"` -> `observation_space=Box(0, 255, (210, 160, 3), np.uint8)`
+- `obs_type="ram"` -> `observation_space=Box(0, 255, (128,), np.uint8)`
+- `obs_type="grayscale"` -> `Box(0, 255, (210, 160), np.uint8)`, a grayscale version of the q"rgb" type
 
 See variants section for the type of observation used by each environment id by default.
-
 {reward_description}
-
 ## Variants
 
 {env_name} has the following variants of the environment id which have the following differences in observation,
 the number of frame-skips and the repeat action probability.
 
 {env_variant_table}
 
+See the [version history page](https://ale.farama.org/environments/#version-history-and-naming-schemes) to implement previously implemented environments, e.g., `{env_name}NoFrameskip-v4`.
+
 ## Difficulty and modes
 
 It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
@@ -246,5 +206,5 @@ def shortened_repr(values):
 * v4: Stickiness of actions was removed
 * v0: Initial versions release
 """
-    with open(f"../environments/atari/{rom_id}.md", "w") as file:
+    with open(f"../environments/{rom_id}.md", "w") as file:
         file.write(TEMPLATE)
diff --git a/docs/environments.md b/docs/environments.md
@@ -142,21 +142,19 @@ The Atari environments observation can be
 
 ## Rewards
 
-The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
-find these manuals on [AtariAge](https://atariage.com/).
+The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/).
 
 ## Stochasticity
 
-As the Atari games are entirely deterministic, agents could achieve
-state-of-the-art performance by simply memorizing an optimal sequence of actions while completely ignoring observations from the environment.
+As the Atari games are entirely deterministic, agents can achieve state-of-the-art performance by simply memorizing an optimal sequence of actions while completely ignoring observations from the environment.
 
 To avoid this, there are several methods to avoid this.
 
 1. Sticky actions: Instead of always simulating the action passed to the environment, there is a small
 probability that the previously executed action is used instead. In the v0 and v5 environments, the probability of
 repeating an action is `25%` while in v4 environments, the probability is `0%`. Users can specify the repeat action
 probability using `repeat_action_probability` to `make`.
-2. Frameskipping: On each environment step, the action can be repeated for a random number of frames. This behavior
+2. Frame-skipping: On each environment step, the action can be repeated for a random number of frames. This behavior
 may be altered by setting the keyword argument `frameskip` to either a positive integer or
 a tuple of two positive integers. If `frameskip` is an integer, frame skipping is deterministic, and in each step the action is
 repeated `frameskip` many times. Otherwise, if `frameskip` is a tuple, the number of skipped frames is chosen uniformly at
@@ -190,38 +188,31 @@ action space will be reduced to a subset.
 
 ## Version History and Naming Schemes
 
-All Atari games are available in three versions. They differ in the default settings of the arguments above.
-The differences are listed in the following table:
-
-| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
-|---------|--------------|------------------------------|----------------------|
-| v0      | `(2, 5,)`    | `0.25`                       | `False`              |
-| v4      | `(2, 5,)`    | `0.0`                        | `False`              |
-| v5      | `4`          | `0.25`                       | `False`              |
-
-> Version v5 follows the best practices outlined in [[2]](#2). Thus, it is recommended to transition to v5 and
-customize the environment using the arguments above, if necessary.
-
-For each Atari game, several different configurations are registered in Gymnasium. The naming schemes are analogous for
-v0 and v4. Let us take a look at all variations of Amidar-v0 that are registered with gymnasium:
-
-| Name                       | `obs_type=` | `frameskip=` | `repeat_action_probability=` |
-|----------------------------|-------------|--------------|------------------------------|
-| Amidar-v0                  | `"rgb"`     | `(2, 5,)`    | `0.25`                       |
-| AmidarDeterministic-v0     | `"rgb"`     | `4`          | `0.0`                        |
-| AmidarNoframeskip-v0       | `"rgb"`     | `1`          | `0.25`                       |
-| Amidar-ram-v0              | `"ram"`     | `(2, 5,)`    | `0.25`                       |
-| Amidar-ramDeterministic-v0 | `"ram"`     | `4`          | `0.0`                        |
-| Amidar-ramNoframeskip-v0   | `"ram"`     | `1`          | `0.25`                       |
-
-Things change in v5: The suffixes "Deterministic" and "NoFrameskip" are no longer available. Instead, you must specify the
-environment configuration via arguments passed to `gymnasium.make`. Moreover, the v5 environments
-are in the "ALE" namespace. The suffix "-ram" is still available. Thus, we get the following table:
-
-| Name              | `obs_type=` | `frameskip=` | `repeat_action_probability=` |
-|-------------------|-------------|--------------|------------------------------|
-| ALE/Amidar-v5     | `"rgb"`     | `4`          | `0.25`                       |
-| ALE/Amidar-ram-v5 | `"ram"`     | `4`          | `0.25`                       |
+In v0.11, the number of registered Atari environments does significantly reduced from 960 to 105 to only register `ALE/{rom_name}-v5` following the best practices outlined in [[2]](#2).
+
+| Name             | `obs_type=` | `frameskip=` | `repeat_action_probability=` | `full_ation_space=` |
+|------------------|-------------|--------------|------------------------------|---------------------|
+| ALE/Adventure-v5 | `"rgb"`     | `4`          | `0.25`                       | `False`             |
+
+Importantly, `repeat_action_probability=0.25` can negatively impact the performance of agents so when comparing training graphs, be aware of the parameters used for fair comparisons.
+
+To create previously implemented environment use the following parameters, `gymnasium.make(env_id, obs_type=..., frameskip=..., repeat_action_probability=..., full_action_space=...)`.
+
+| Name                          | `obs_type=` | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
+|-------------------------------|-------------|--------------|------------------------------|----------------------|
+| Adventure-v0                  | `"rgb"`     | `(2, 5,)`    | `0.25`                       | `False`              |
+| AdventureDeterministic-v0     | `"rgb"`     | `4`          | `0.25`                       | `False`              |
+| AdventureNoframeskip-v0       | `"rgb"`     | `1`          | `0.25`                       | `False`              |
+| Adventure-ram-v0              | `"ram"`     | `(2, 5,)`    | `0.25`                       | `False`              |
+| Adventure-ramDeterministic-v0 | `"ram"`     | `4`          | `0.25`                       | `False`              |
+| Adventure-ramNoframeskip-v0   | `"ram"`     | `1`          | `0.25`                       | `False`              |
+| Adventure-v4                  | `"rgb"`     | `(2, 5,)`    | `0.0`                        | `False`              |
+| AdventureDeterministic-v4     | `"rgb"`     | `4`          | `0.0`                        | `False`              |
+| AdventureNoframeskip-v4       | `"rgb"`     | `1`          | `0.0`                        | `False`              |
+| Adventure-ram-v4              | `"ram"`     | `(2, 5,)`    | `0.0`                        | `False`              |
+| Adventure-ramDeterministic-v4 | `"ram"`     | `4`          | `0.0`                        | `False`              |
+| Adventure-ramNoframeskip-v4   | `"ram"`     | `1`          | `0.0`                        | `False`              |
+| ALE/Adventure-ram-v5          | `"ram"`     | `4`          | `0.25`                       | `False`              |
 
 ## Flavors