You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: _bibliography/papers.bib
+15-1Lines changed: 15 additions & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -6,6 +6,20 @@ @string{iros
6
6
@string{ral = {IEEE Robotics and Automation Letters}}
7
7
@string{tro = {IEEE Transactions on Robotics}}
8
8
@string{ijrr = {The International Journal of Robotics Research}}
9
+
@misc{yan2025vizcoast,
10
+
title = {Using {VLM} Reasoning to Constrain Task and Motion Planning},
11
+
author = {Muyang Yan and Miras Mengdibayev and Ardon Floros and Weihang Guo and Lydia E. Kavraki and Zachary Kingston},
12
+
abstract = {In task and motion planning, high-level task planning is done over an abstraction of the world to enable efficient search in long-horizon robotics problems. However, the feasibility of these task-level plans relies on the downward refinability of the abstraction into continuous motion. When a domain's refinability is poor, task-level plans that appear valid may ultimately fail during motion planning, requiring replanning and resulting in slower overall performance. Prior works mitigate this by encoding refinement issues as constraints to prune infeasible task plans. However, these approaches only add constraints upon refinement failure, expending significant search effort on infeasible branches. We propose VIZ-COAST, a method of leveraging the common-sense spatial reasoning of large pretrained Vision-Language Models to identify issues with downward refinement a priori, bypassing the need to fix these failures during planning. Experiments on two challenging TAMP domains show that our approach is able to extract plausible constraints from images and domain descriptions, drastically reducing planning times and, in some cases, eliminating downward refinement failures altogether, generalizing to a diverse range of instances from the broader domain.},
13
+
year = 2025,
14
+
eprint = {2510.25548},
15
+
archivePrefix = {arXiv},
16
+
primaryClass = {cs.RO},
17
+
pdf = {https://arxiv.org/abs/2510.25548},
18
+
projects = {long-horizion,implicit},
19
+
note = {Under Review},
20
+
abbr = {ARXIV},
21
+
preview = {viz_coast.png}
22
+
}
9
23
@misc{sabbadini2025replan,
10
24
title = {Revisiting Replanning from Scratch: Real-Time Incremental Planning with Fast Almost-Surely Asymptotically Optimal Planners},
11
25
author = {Mitchell E. C. Sabbadini and Andrew H. Liu and Joseph Ruan and Tyler S. Wilson and Zachary Kingston and Jonathan D. Gammell},
@@ -24,7 +38,7 @@ @misc{sabbadini2025replan
24
38
preview = {from_scratch.png}
25
39
}
26
40
@misc{mao2025cde,
27
-
title = {CDE: Concept-Driven Exploration for Reinforcement Learning},
41
+
title = {{CDE}: Concept-Driven Exploration for Reinforcement Learning},
28
42
author = {Le Mao and Andrew H. Liu and Renos Zabounidis and Zachary Kingston and Joseph Campbell},
29
43
abstract = {Intelligent exploration remains a critical challenge in reinforcement learning (RL), especially in visual control tasks. Unlike low-dimensional state-based RL, visual RL must extract task-relevant structure from raw pixels, making exploration inefficient. We propose Concept-Driven Exploration (CDE), which leverages a pre-trained vision-language model (VLM) to generate object-centric visual concepts from textual task descriptions as weak, potentially noisy supervisory signals. Rather than directly conditioning on these noisy signals, CDE trains a policy to reconstruct the concepts via an auxiliary objective, using reconstruction accuracy as an intrinsic reward to guide exploration toward task-relevant objects. Because the policy internalizes these concepts, VLM queries are only needed during training, reducing dependence on external models during deployment. Across five challenging simulated visual manipulation tasks, CDE achieves efficient, targeted exploration and remains robust to noisy VLM predictions. Finally, we demonstrate real-world transfer by deploying CDE on a Franka Research 3 arm, attaining an 80\% success rate in a real-world manipulation task.},
0 commit comments