[Feature,Example] Add MCTS algorithm and example #2796
Dr.CI classification results
{"FAILED":[{"workflowId":15125624407,"workflowUniqueId":55958309,"id":42517001791,"runnerName":"i-0f3b94e7efff297ea","authorEmail":"kurtamohler@gmail.com","name":"Lint / python-source-and-configs / linux-job","jobName":"python-source-and-configs / linux-job","conclusion":"failure","completed_at":"2025-05-20T00:04:06.000000000Z","html_url":"https://github.com/pytorch/rl/actions/runs/15125624407/job/42517001791","head_branch":"gh/kurtamohler/5/head","pr_number":2796,"head_sha":"b4f0a0e476c35a696090d66e673bfbfe05449a47","head_sha_timestamp":"2025-05-19T23:57:02.000000000Z","failure_captures":["RuntimeError: Command docker exec -t abef9d741dccc850eacec56b5d5655ee3ec6f2eff6178d9e6994655c287fae47 /exec failed with exit code 1"],"failure_lines":["RuntimeError: Command docker exec -t abef9d741dccc850eacec56b5d5655ee3ec6f2eff6178d9e6994655c287fae47 /exec failed with exit code 1"],"failure_context":[],"time":"2025-05-19T23:57:13.000000000Z"},{"workflowId":15125624388,"workflowUniqueId":61790681,"id":42517001746,"runnerName":"i-0d400c8c85d3f5270","authorEmail":"kurtamohler@gmail.com","name":"Continuous Benchmark (PR) / GPU Pytest benchmark","jobName":"GPU Pytest benchmark","conclusion":"failure","completed_at":"2025-05-20T00:31:08.000000000Z","html_url":"https://github.com/pytorch/rl/actions/runs/15125624388/job/42517001746","head_branch":"gh/kurtamohler/5/head","pr_number":2796,"head_sha":"b4f0a0e476c35a696090d66e673bfbfe05449a47","head_sha_timestamp":"2025-05-19T23:57:02.000000000Z","failure_captures":["Process completed with exit code 1."],"failure_lines":["##[error]Process completed with exit code 1."],"failure_context":[],"time":"2025-05-19T23:57:13.000000000Z"},{"workflowId":15125624388,"workflowUniqueId":61790681,"id":42517001753,"runnerName":"","authorEmail":"kurtamohler@gmail.com","name":"Continuous Benchmark (PR) / CPU Pytest benchmark","jobName":"CPU Pytest benchmark","conclusion":"cancelled","completed_at":"2025-05-20T23:57:14.000000000Z","html_url":"https://github.com/pytorch/rl/actions/runs/15125624388/job/42517001753","head_branch":"gh/kurtamohler/5/head","pr_number":2796,"head_sha":"b4f0a0e476c35a696090d66e673bfbfe05449a47","head_sha_timestamp":"2025-05-19T23:57:02.000000000Z","failure_captures":[],"failure_lines":[],"failure_context":[],"time":"2025-05-19T23:57:13.000000000Z"},{"workflowId":15125624406,"workflowUniqueId":60501564,"id":42517001737,"runnerName":"i-0434932b15ca81d80","authorEmail":"kurtamohler@gmail.com","name":"Habitat Tests on Linux / tests (3.9, 12.8) / linux-job","jobName":"tests (3.9, 12.8) / linux-job","conclusion":"failure","completed_at":"2025-05-20T00:08:06.000000000Z","html_url":"https://github.com/pytorch/rl/actions/runs/15125624406/job/42517001737","head_branch":"gh/kurtamohler/5/head","pr_number":2796,"head_sha":"b4f0a0e476c35a696090d66e673bfbfe05449a47","head_sha_timestamp":"2025-05-19T23:57:02.000000000Z","failure_captures":["RuntimeError: Command docker exec -t cbfc7f843b26254b8c7bd0daa754bfea8a4bd35e469fa03055f297069a823edc /exec failed with exit code 1"],"failure_lines":["RuntimeError: Command docker exec -t cbfc7f843b26254b8c7bd0daa754bfea8a4bd35e469fa03055f297069a823edc /exec failed with exit code 1"],"failure_context":[],"time":"2025-05-19T23:57:13.000000000Z"},{"workflowId":15125624420,"workflowUniqueId":82165822,"id":42517001924,"runnerName":"i-075e1a281d9bf2c7b","authorEmail":"kurtamohler@gmail.com","name":"Unit-tests on Linux / tests-cpu (3.12) / linux-job","jobName":"tests-cpu (3.12) / linux-job","conclusion":"failure","completed_at":"2025-05-20T00:54:59.000000000Z","html_url":"https://github.com/pytorch/rl/actions/runs/15125624420/job/42517001924","head_branch":"gh/kurtamohler/5/head","pr_number":2796,"head_sha":"b4f0a0e476c35a696090d66e673bfbfe05449a47","head_sha_timestamp":"2025-05-19T23:57:02.000000000Z","failure_captures":["test/test_env.py::TestNonTensorEnv::test_parallel[False-False]"],"failure_lines":["FAILED test/test_env.py::TestNonTensorEnv::test_parallel[False-False] - Failed: Timeout (>120.0s) from pytest-timeout."],"failure_context":[],"time":"2025-05-19T23:57:13.000000000Z"}],"FLAKY":[],"BROKEN_TRUNK":[{"workflowId":15125624418,"workflowUniqueId":82165821,"id":42517002237,"runnerName":"i-097fc3f7735ea9603","authorEmail":"kurtamohler@gmail.com","name":"Libs Tests on Linux / unittests-gym (3.9, 12.8) / linux-job","jobName":"unittests-gym (3.9, 12.8) / linux-job","conclusion":"failure","completed_at":"2025-05-20T00:13:25.000000000Z","html_url":"https://github.com/pytorch/rl/actions/runs/15125624418/job/42517002237","head_branch":"gh/kurtamohler/5/head","pr_number":2796,"head_sha":"b4f0a0e476c35a696090d66e673bfbfe05449a47","head_sha_timestamp":"2025-05-19T23:57:02.000000000Z","failure_captures":["test/test_libs.py::TestGym::test_gym_fake_td[True-False-3-HalfCheetah-v2]"],"failure_lines":["FAILED test/test_libs.py::TestGym::test_gym_fake_td[True-False-3-HalfCheetah-v2] - RuntimeError: Failed to initialize OpenGL"],"failure_context":[],"time":"2025-05-19T23:57:14.000000000Z"},{"workflowId":15125624435,"workflowUniqueId":149027642,"id":42517001820,"runnerName":"i-04bfb983893c7f83c","authorEmail":"kurtamohler@gmail.com","name":"LLM Tests on Linux / unittests (3.9, 12.8) / linux-job","jobName":"unittests (3.9, 12.8) / linux-job","conclusion":"cancelled","completed_at":"2025-05-20T01:58:01.000000000Z","html_url":"https://github.com/pytorch/rl/actions/runs/15125624435/job/42517001820","head_branch":"gh/kurtamohler/5/head","pr_number":2796,"head_sha":"b4f0a0e476c35a696090d66e673bfbfe05449a47","head_sha_timestamp":"2025-05-19T23:57:02.000000000Z","failure_captures":["##[error]The operation was canceled."],"failure_lines":["##[error]The operation was canceled."],"failure_context":[],"time":"2025-05-19T23:57:13.000000000Z"},{"workflowId":15125624420,"workflowUniqueId":82165822,"id":42517001944,"runnerName":"i-0a77ecd71c91f8eb2","authorEmail":"kurtamohler@gmail.com","name":"Unit-tests on Linux / tests-gpu (3.11, 12.8) / linux-job","jobName":"tests-gpu (3.11, 12.8) / linux-job","conclusion":"failure","completed_at":"2025-05-20T01:07:37.000000000Z","html_url":"https://github.com/pytorch/rl/actions/runs/15125624420/job/42517001944","head_branch":"gh/kurtamohler/5/head","pr_number":2796,"head_sha":"b4f0a0e476c35a696090d66e673bfbfe05449a47","head_sha_timestamp":"2025-05-19T23:57:02.000000000Z","failure_captures":["test/test_cost.py::TestPPO::test_ppo_value_clipping[False-clip_value4-KLPENPPOLoss-device0]"],"failure_lines":["FAILED test/test_cost.py::TestPPO::test_ppo_value_clipping[False-clip_value4-KLPENPPOLoss-device0] - RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument min in method wrapper_CUDA_clamp_Tensor)"],"failure_context":[],"time":"2025-05-19T23:57:14.000000000Z"},{"workflowId":15125624420,"workflowUniqueId":82165822,"id":42517001992,"runnerName":"i-00bbf1d58a5442e54","authorEmail":"kurtamohler@gmail.com","name":"Unit-tests on Linux / tests-olddeps (3.9, 11.6) / linux-job","jobName":"tests-olddeps (3.9, 11.6) / linux-job","conclusion":"failure","completed_at":"2025-05-20T01:10:38.000000000Z","html_url":"https://github.com/pytorch/rl/actions/runs/15125624420/job/42517001992","head_branch":"gh/kurtamohler/5/head","pr_number":2796,"head_sha":"b4f0a0e476c35a696090d66e673bfbfe05449a47","head_sha_timestamp":"2025-05-19T23:57:02.000000000Z","failure_captures":["test/test_transforms.py::TestActionDiscretizer::test_transform_env[cheetah-SamplingStrategy.RANDOM-False-True]"],"failure_lines":["FAILED test/test_transforms.py::TestActionDiscretizer::test_transform_env[cheetah-SamplingStrategy.RANDOM-False-True] - distutils.compilers.C.errors.CompileError: command '/usr/bin/gcc' failed with exit code 1"],"failure_context":[],"time":"2025-05-19T23:57:14.000000000Z"},{"workflowId":15125624420,"workflowUniqueId":82165822,"id":42517001965,"runnerName":"i-02b08b7e6283953c5","authorEmail":"kurtamohler@gmail.com","name":"Unit-tests on Linux / tests-stable-gpu (3.10, 11.8) / linux-job","jobName":"tests-stable-gpu (3.10, 11.8) / linux-job","conclusion":"failure","completed_at":"2025-05-20T01:10:01.000000000Z","html_url":"https://github.com/pytorch/rl/actions/runs/15125624420/job/42517001965","head_branch":"gh/kurtamohler/5/head","pr_number":2796,"head_sha":"b4f0a0e476c35a696090d66e673bfbfe05449a47","head_sha_timestamp":"2025-05-19T23:57:02.000000000Z","failure_captures":["test/test_cost.py::TestPPO::test_ppo_value_clipping[False-clip_value4-KLPENPPOLoss-device0]"],"failure_lines":["FAILED test/test_cost.py::TestPPO::test_ppo_value_clipping[False-clip_value4-KLPENPPOLoss-device0] - RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument min in method wrapper_CUDA_clamp_Tensor)"],"failure_context":[],"time":"2025-05-19T23:57:14.000000000Z"},{"workflowId":15125624416,"workflowUniqueId":79519303,"id":42517001728,"runnerName":"i-02e1fc4cc88bd1a11","authorEmail":"kurtamohler@gmail.com","name":"Unit-tests on Windows / unittests-cpu (3.10, windows.4xlarge, cpu) / windows-job","jobName":"unittests-cpu (3.10, windows.4xlarge, cpu) / windows-job","conclusion":"failure","completed_at":"2025-05-20T00:30:08.000000000Z","html_url":"https://github.com/pytorch/rl/actions/runs/15125624416/job/42517001728","head_branch":"gh/kurtamohler/5/head","pr_number":2796,"head_sha":"b4f0a0e476c35a696090d66e673bfbfe05449a47","head_sha_timestamp":"2025-05-19T23:57:02.000000000Z","failure_captures":["test/test_transforms.py::TestTimer::test_transform_env"],"failure_lines":["FAILED test/test_transforms.py::TestTimer::test_transform_env - assert tensor(0) == 2"],"failure_context":[],"time":"2025-05-19T23:57:13.000000000Z"}],"UNSTABLE":[]}