-
Notifications
You must be signed in to change notification settings - Fork 373
/
Copy pathmain.py
43 lines (35 loc) · 1.14 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import gym_cutting_stock
import gymnasium as gym
from policy import GreedyPolicy, RandomPolicy
# Create the environment
env = gym.make(
"gym_cutting_stock/CuttingStock-v0",
render_mode="human", # Comment this line to disable rendering
)
NUM_EPISODES = 100
if __name__ == "__main__":
# Reset the environment
observation, info = env.reset(seed=42)
# Test GreedyPolicy
gd_policy = GreedyPolicy()
ep = 0
while ep < NUM_EPISODES:
action = gd_policy.get_action(observation, info)
observation, reward, terminated, truncated, info = env.step(action)
if terminated or truncated:
print(info)
observation, info = env.reset(seed=ep)
ep += 1
# Reset the environment
observation, info = env.reset(seed=42)
# Test RandomPolicy
rd_policy = RandomPolicy()
ep = 0
while ep < NUM_EPISODES:
action = rd_policy.get_action(observation, info)
observation, reward, terminated, truncated, info = env.step(action)
if terminated or truncated:
print(info)
observation, info = env.reset(seed=ep)
ep += 1
env.close()