-
Notifications
You must be signed in to change notification settings - Fork 0
/
dumbib_dat.tex
45 lines (30 loc) · 3.49 KB
/
dumbib_dat.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
\dumbibReferenceEntry{abbasi-yadkori_etal2011}{Abbasi{-}Yadkori et al.}{2011}%
{Abbasi{-}Yadkori Y., P{\'{a}}l D., Szepesv{\'{a}}ri C. (2011). Improved Algorithms for Linear Stochastic Bandits. \textit{Advances in Neural Information Processing Systems (NeurIPS)}.}
\dumbibReferenceEntry{abel_etal2023}{Abel et al.}{2023}%
{Abel D., Barreto A., Van Roy B., Precup D., van Hasselt H.P., Singh S. (2023). A Definition of Continual Reinforcement Learning. \textit{Advances in Neural Information Processing Systems (NeurIPS)}.}
\dumbibReferenceEntry{durrett2019}{Durrett}{2019}%
{Durrett R. (2019). \textit{Probability: theory and examples}. Cambridge University Press.}
\dumbibReferenceEntry{klochkov_zhivotovskiy2021a}{Klochkov and Zhivotovskiy}{2021a}%
{Klochkov Y., Zhivotovskiy N. (2021a). Stability and Deviation Optimal Risk Bounds with Convergence Rate $ O (1/n) $. \textit{Advances in Neural Information Processing Systems (NeurIPS)}.}
\dumbibReferenceEntry{klochkov_zhivotovskiy2021b}{Klochkov and Zhivotovskiy}{2021b}%
{Klochkov Y., Zhivotovskiy N. (2021b). Stability and Deviation Optimal Risk Bounds with Convergence Rate {\textdollar}O(1/n){\textdollar}. \textit{Advances in Neural Information Processing Systems (NeurIPS)}.}
\dumbibReferenceEntry{mahmood_sutton2013}{Mahmood and Sutton}{2013}%
{Mahmood A.R., Sutton R.S. (2013). Representation search through generate and test. \textit{Workshops at the Twenty-Seventh AAAI conference on artificial intelligence}.}
\dumbibReferenceEntry{osband_etal2020}{Osband et al.}{2020}%
{Osband I., Doron Y., Hessel M., Aslanides J., Sezener E., Saraiva A., McKinney K., Lattimore T., Szepesv{\'{a}}ri C., Singh S., Van Roy B., Sutton R.S., Silver D., van Hasselt H. (2020). Behaviour Suite for Reinforcement Learning. \textit{International Conference on Learning Representations (ICLR)}.}
\dumbibReferenceEntry{pierre_vonderohe2021}{Pierre and von der Ohe}{2021}%
{Pierre \'E., von der Ohe S. (2021). lplpl. \textit{Advances in Neural Information Processing Systems (NeurIPS)}.}
\dumbibReferenceEntry{schulman_etal2017}{Schulman et al.}{2017}%
{Schulman J., Wolski F., Dhariwal P., Radford A., Klimov O. (2017). Proximal Policy Optimization Algorithms. \textit{arXiv: 1707.06347}.}
\dumbibReferenceEntry{sutton_barto1998}{Sutton and Barto}{1998}%
{Sutton R.S., Barto A.G. (1998). \textit{Reinforcement learning - an introduction}. MIT Press.}
\dumbibReferenceEntry{sutton_etal2009}{Sutton et al.}{2009}%
{Sutton R.S., Maei H.R., Precup D., Bhatnagar S., Silver D., Szepesv{\'a}ri C., Wiewiora E. (2009). Fast gradient-descent methods for temporal-difference learning with linear function approximation. \textit{International Conference on Machine Learning (ICML)}.}
\dumbibReferenceEntry{tsybakov2004}{Tsybakov}{2004}%
{Tsybakov A.B. (2004). Optimal aggregation of classifiers in statistical learning. \textit{The Annals of Statistics}.}
\dumbibReferenceEntry{vanroy_tsitsiklis1995}{Van Roy and Tsitsiklis}{1995}%
{Van Roy B., Tsitsiklis J.N. (1995). Stable LInear Approximations to Dynamic Programming for Stochastic Control Problems with Local Transitions. \textit{Advances in Neural Information Processing Systems (NeurIPS)}.}
\dumbibReferenceEntry{xiao_etal2024}{Xiao et al.}{2024}%
{Xiao T.Z., Bamler R., Schölkopf B., Liu W. (2024). Verbalized Machine Learning: Revisiting Machine Learning with Language Models. \textit{arXiv: 2406.04344}.}
\dumbibReferenceEntry{zhang2023}{Zhang}{2023}%
{Zhang T. (2023). \textit{Mathematical analysis of machine learning algorithms}. Cambridge University Press.}