Stop the war!
Остановите войну!
for scientists:
default search action
BibTeX records: Richard S. Sutton
@inproceedings{DBLP:conf/aaai/SuttonMHSTT024, author = {Richard S. Sutton and Marlos C. Machado and G. Zacharias Holland and David Szepesvari and Finbarr Timbers and Brian Tanner and Adam White}, editor = {Michael J. Wooldridge and Jennifer G. Dy and Sriraam Natarajan}, title = {Reward-Respecting Subtasks for Model-Based Reinforcement Learning (Abstract Reprint)}, booktitle = {Thirty-Eighth {AAAI} Conference on Artificial Intelligence, {AAAI} 2024, Thirty-Sixth Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2024, Fourteenth Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2014, February 20-27, 2024, Vancouver, Canada}, pages = {22713}, publisher = {{AAAI} Press}, year = {2024}, url = {https://doi.org/10.1609/aaai.v38i20.30613}, doi = {10.1609/AAAI.V38I20.30613}, timestamp = {Tue, 02 Apr 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/SuttonMHSTT024.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2401-17401, author = {Thomas Degris and Khurram Javed and Arsalan Sharifnassab and Yuxin Liu and Richard S. Sutton}, title = {Step-size Optimization for Continual Learning}, journal = {CoRR}, volume = {abs/2401.17401}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2401.17401}, doi = {10.48550/ARXIV.2401.17401}, eprinttype = {arXiv}, eprint = {2401.17401}, timestamp = {Wed, 07 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2401-17401.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2402-02342, author = {Arsalan Sharifnassab and Saber Salehkaleybar and Richard S. Sutton}, title = {MetaOptimize: {A} Framework for Optimizing Step Sizes and Other Meta-parameters}, journal = {CoRR}, volume = {abs/2402.02342}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.02342}, doi = {10.48550/ARXIV.2402.02342}, eprinttype = {arXiv}, eprint = {2402.02342}, timestamp = {Fri, 09 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-02342.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/adb/RafieeAGKSLW23, author = {Banafsheh Rafiee and Zaheer Abbas and Sina Ghiassian and Raksha Kumaraswamy and Richard S. Sutton and Elliot A. Ludvig and Adam White}, title = {From eye-blinks to state construction: Diagnostic benchmarks for online representation learning}, journal = {Adapt. Behav.}, volume = {31}, number = {1}, pages = {3--19}, year = {2023}, url = {https://doi.org/10.1177/10597123221085039}, doi = {10.1177/10597123221085039}, timestamp = {Mon, 28 Aug 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/adb/RafieeAGKSLW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ai/SuttonMHSTTW23, author = {Richard S. Sutton and Marlos C. Machado and G. Zacharias Holland and David Szepesvari and Finbarr Timbers and Brian Tanner and Adam White}, title = {Reward-respecting subtasks for model-based reinforcement learning}, journal = {Artif. Intell.}, volume = {324}, pages = {104001}, year = {2023}, url = {https://doi.org/10.1016/j.artint.2023.104001}, doi = {10.1016/J.ARTINT.2023.104001}, timestamp = {Wed, 01 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ai/SuttonMHSTTW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/JavedSSW23, author = {Khurram Javed and Haseeb Shah and Richard S. Sutton and Martha White}, title = {Scalable Real-Time Recurrent Learning Using Columnar-Constructive Networks}, journal = {J. Mach. Learn. Res.}, volume = {24}, pages = {256:1--256:34}, year = {2023}, url = {http://jmlr.org/papers/v24/23-0367.html}, timestamp = {Thu, 19 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/JavedSSW23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/nca/MathewsonPSESP23, author = {Kory W. Mathewson and Adam S. R. Parker and Craig Sherstan and Ann L. Edwards and Richard S. Sutton and Patrick M. Pilarski}, title = {Communicative capital: a key resource for human-machine shared agency and collaborative capacity}, journal = {Neural Comput. Appl.}, volume = {35}, number = {23}, pages = {16805--16819}, year = {2023}, url = {https://doi.org/10.1007/s00521-022-07948-1}, doi = {10.1007/S00521-022-07948-1}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/nca/MathewsonPSESP23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/collas/RafieeG0S0023, author = {Banafsheh Rafiee and Sina Ghiassian and Jun Jin and Richard S. Sutton and Jun Luo and Adam White}, editor = {Sarath Chandar and Razvan Pascanu and Hanie Sedghi and Doina Precup}, title = {Auxiliary task discovery through generate-and-test}, booktitle = {Conference on Lifelong Learning Agents, 22-25 August 2023, McGill University, Montr{\'{e}}al, Qu{\'{e}}bec, Canada}, series = {Proceedings of Machine Learning Research}, volume = {232}, pages = {703--714}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v232/rafiee23a.html}, timestamp = {Tue, 20 Feb 2024 13:52:18 +0100}, biburl = {https://dblp.org/rec/conf/collas/RafieeG0S0023.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/collas/Asis0S23, author = {Kristopher De Asis and Eric Graves and Richard S. Sutton}, editor = {Sarath Chandar and Razvan Pascanu and Hanie Sedghi and Doina Precup}, title = {Value-aware Importance Weighting for Off-policy Reinforcement Learning}, booktitle = {Conference on Lifelong Learning Agents, 22-25 August 2023, McGill University, Montr{\'{e}}al, Qu{\'{e}}bec, Canada}, series = {Proceedings of Machine Learning Research}, volume = {232}, pages = {745--763}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v232/de-asis23a.html}, timestamp = {Tue, 20 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/collas/Asis0S23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SharifnassabS23, author = {Arsalan Sharifnassab and Richard S. Sutton}, editor = {Andreas Krause and Emma Brunskill and Kyunghyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett}, title = {Toward Efficient Gradient-Based Value Estimation}, booktitle = {International Conference on Machine Learning, {ICML} 2023, 23-29 July 2023, Honolulu, Hawaii, {USA}}, series = {Proceedings of Machine Learning Research}, volume = {202}, pages = {30827--30849}, publisher = {{PMLR}}, year = {2023}, url = {https://proceedings.mlr.press/v202/sharifnassab23a.html}, timestamp = {Mon, 28 Aug 2023 17:23:08 +0200}, biburl = {https://dblp.org/rec/conf/icml/SharifnassabS23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2301-13757, author = {Arsalan Sharifnassab and Richard Sutton}, title = {Toward Efficient Gradient-Based Value Estimation}, journal = {CoRR}, volume = {abs/2301.13757}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2301.13757}, doi = {10.48550/ARXIV.2301.13757}, eprinttype = {arXiv}, eprint = {2301.13757}, timestamp = {Tue, 07 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2301-13757.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-05326, author = {Khurram Javed and Haseeb Shah and Richard S. Sutton and Martha White}, title = {Online Real-Time Recurrent Learning Using Sparse Connections and Selective Learning}, journal = {CoRR}, volume = {abs/2302.05326}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.05326}, doi = {10.48550/ARXIV.2302.05326}, eprinttype = {arXiv}, eprint = {2302.05326}, timestamp = {Mon, 13 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-05326.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2306-13812, author = {Shibhansh Dohare and J. Fernando Hernandez{-}Garcia and Parash Rahman and Richard S. Sutton and A. Rupam Mahmood}, title = {Maintaining Plasticity in Deep Continual Learning}, journal = {CoRR}, volume = {abs/2306.13812}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2306.13812}, doi = {10.48550/ARXIV.2306.13812}, eprinttype = {arXiv}, eprint = {2306.13812}, timestamp = {Tue, 27 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2306-13812.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2306-15625, author = {Kristopher De Asis and Eric Graves and Richard S. Sutton}, title = {Value-aware Importance Weighting for Off-policy Reinforcement Learning}, journal = {CoRR}, volume = {abs/2306.15625}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2306.15625}, doi = {10.48550/ARXIV.2306.15625}, eprinttype = {arXiv}, eprint = {2306.15625}, timestamp = {Fri, 30 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2306-15625.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2310-01569, author = {Kenny Young and Richard S. Sutton}, title = {Iterative Option Discovery for Planning, by Planning}, journal = {CoRR}, volume = {abs/2310.01569}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2310.01569}, doi = {10.48550/ARXIV.2310.01569}, eprinttype = {arXiv}, eprint = {2310.01569}, timestamp = {Thu, 19 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2310-01569.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2312-15091, author = {Huizhen Yu and Yi Wan and Richard S. Sutton}, title = {A Note on Stability in Asynchronous Stochastic Approximation without Communication Delays}, journal = {CoRR}, volume = {abs/2312.15091}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2312.15091}, doi = {10.48550/ARXIV.2312.15091}, eprinttype = {arXiv}, eprint = {2312.15091}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2312-15091.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/TianYS22, author = {Tian Tian and Kenny Young and Richard S. Sutton}, editor = {Sanmi Koyejo and S. Mohamed and A. Agarwal and Danielle Belgrave and K. Cho and A. Oh}, title = {Doubly-Asynchronous Value Iteration: Making Value Iteration Asynchronous in Actions}, booktitle = {Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022}, year = {2022}, url = {http://papers.nips.cc/paper\_files/paper/2022/hash/24e4e3234178a836b70e0aa48827e0ff-Abstract-Conference.html}, timestamp = {Mon, 08 Jan 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/TianYS22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-03466, author = {Richard S. Sutton and Marlos C. Machado and G. Zacharias Holland and David Szepesvari and Finbarr Timbers and Brian Tanner and Adam White}, title = {Reward-Respecting Subtasks for Model-Based Reinforcement Learning}, journal = {CoRR}, volume = {abs/2202.03466}, year = {2022}, url = {https://arxiv.org/abs/2202.03466}, eprinttype = {arXiv}, eprint = {2202.03466}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-03466.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-09701, author = {Richard S. Sutton}, title = {A History of Meta-gradient: Gradient Methods for Meta-learning}, journal = {CoRR}, volume = {abs/2202.09701}, year = {2022}, url = {https://arxiv.org/abs/2202.09701}, eprinttype = {arXiv}, eprint = {2202.09701}, timestamp = {Tue, 01 Mar 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-09701.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-13252, author = {Richard S. Sutton}, title = {The Quest for a Common Model of the Intelligent Decision Maker}, journal = {CoRR}, volume = {abs/2202.13252}, year = {2022}, url = {https://arxiv.org/abs/2202.13252}, eprinttype = {arXiv}, eprint = {2202.13252}, timestamp = {Wed, 02 Mar 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-13252.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2205-12515, author = {Yi Wan and Richard S. Sutton}, title = {Toward Discovering Options that Achieve Faster Planning}, journal = {CoRR}, volume = {abs/2205.12515}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2205.12515}, doi = {10.48550/ARXIV.2205.12515}, eprinttype = {arXiv}, eprint = {2205.12515}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2205-12515.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2207-01613, author = {Tian Tian and Kenny Young and Richard S. Sutton}, title = {Doubly-Asynchronous Value Iteration: Making Value Iteration Asynchronous in Actions}, journal = {CoRR}, volume = {abs/2207.01613}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2207.01613}, doi = {10.48550/ARXIV.2207.01613}, eprinttype = {arXiv}, eprint = {2207.01613}, timestamp = {Wed, 06 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2207-01613.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2208-11173, author = {Richard S. Sutton and Michael H. Bowling and Patrick M. Pilarski}, title = {The Alberta Plan for {AI} Research}, journal = {CoRR}, volume = {abs/2208.11173}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2208.11173}, doi = {10.48550/ARXIV.2208.11173}, eprinttype = {arXiv}, eprint = {2208.11173}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2208-11173.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2209-15141, author = {Yi Wan and Richard S. Sutton}, title = {On Convergence of Average-Reward Off-Policy Control Algorithms in Weakly-Communicating MDPs}, journal = {CoRR}, volume = {abs/2209.15141}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2209.15141}, doi = {10.48550/ARXIV.2209.15141}, eprinttype = {arXiv}, eprint = {2209.15141}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2209-15141.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2210-14361, author = {Banafsheh Rafiee and Sina Ghiassian and Jun Jin and Richard S. Sutton and Jun Luo and Adam White}, title = {Auxiliary task discovery through generate-and-test}, journal = {CoRR}, volume = {abs/2210.14361}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2210.14361}, doi = {10.48550/ARXIV.2210.14361}, eprinttype = {arXiv}, eprint = {2210.14361}, timestamp = {Thu, 12 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2210-14361.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ai/SilverSPS21, author = {David Silver and Satinder Singh and Doina Precup and Richard S. Sutton}, title = {Reward is enough}, journal = {Artif. Intell.}, volume = {299}, pages = {103535}, year = {2021}, url = {https://doi.org/10.1016/j.artint.2021.103535}, doi = {10.1016/J.ARTINT.2021.103535}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ai/SilverSPS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/automatica/LeeS21, author = {Jae Young Lee and Richard S. Sutton}, title = {Policy iterations for reinforcement learning problems in continuous time and space - Fundamental theory and methods}, journal = {Autom.}, volume = {126}, pages = {109421}, year = {2021}, url = {https://doi.org/10.1016/j.automatica.2020.109421}, doi = {10.1016/J.AUTOMATICA.2020.109421}, timestamp = {Thu, 18 Mar 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/automatica/LeeS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tsmc/BartoSA21, author = {Andrew G. Barto and Richard S. Sutton and Charles W. Anderson}, title = {Looking Back on the Actor-Critic Architecture}, journal = {{IEEE} Trans. Syst. Man Cybern. Syst.}, volume = {51}, number = {1}, pages = {40--50}, year = {2021}, url = {https://doi.org/10.1109/TSMC.2020.3041775}, doi = {10.1109/TSMC.2020.3041775}, timestamp = {Thu, 27 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tsmc/BartoSA21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/WanNS21, author = {Yi Wan and Abhishek Naik and Richard S. Sutton}, editor = {Marina Meila and Tong Zhang}, title = {Learning and Planning in Average-Reward Markov Decision Processes}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {10653--10662}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/wan21a.html}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/WanNS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/ZhangWSW21, author = {Shangtong Zhang and Yi Wan and Richard S. Sutton and Shimon Whiteson}, editor = {Marina Meila and Tong Zhang}, title = {Average-Reward Off-Policy Policy Evaluation with Function Approximation}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {12578--12588}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/zhang21u.html}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/ZhangWSW21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/WanNS21, author = {Yi Wan and Abhishek Naik and Richard S. Sutton}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Average-Reward Learning and Planning with Options}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {22758--22769}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/c058f544c737782deacefa532d9add4c-Abstract.html}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/WanNS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2101-02808, author = {Shangtong Zhang and Yi Wan and Richard S. Sutton and Shimon Whiteson}, title = {Average-Reward Off-Policy Policy Evaluation with Function Approximation}, journal = {CoRR}, volume = {abs/2101.02808}, year = {2021}, url = {https://arxiv.org/abs/2101.02808}, eprinttype = {arXiv}, eprint = {2101.02808}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2101-02808.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2102-07686, author = {Dylan R. Ashley and Sina Ghiassian and Richard S. Sutton}, title = {Does Standard Backpropagation Forget Less Catastrophically Than Adam?}, journal = {CoRR}, volume = {abs/2102.07686}, year = {2021}, url = {https://arxiv.org/abs/2102.07686}, eprinttype = {arXiv}, eprint = {2102.07686}, timestamp = {Thu, 14 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2102-07686.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2103-05787, author = {Khurram Javed and Martha White and Richard S. Sutton}, title = {Scalable Online Recurrent Learning Using Columnar Neural Networks}, journal = {CoRR}, volume = {abs/2103.05787}, year = {2021}, url = {https://arxiv.org/abs/2103.05787}, eprinttype = {arXiv}, eprint = {2103.05787}, timestamp = {Tue, 16 Mar 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2103-05787.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2104-08543, author = {Katya Kudashkina and Yi Wan and Abhishek Naik and Richard S. Sutton}, title = {Planning with Expectation Models for Control}, journal = {CoRR}, volume = {abs/2104.08543}, year = {2021}, url = {https://arxiv.org/abs/2104.08543}, eprinttype = {arXiv}, eprint = {2104.08543}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2104-08543.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2106-00922, author = {Sina Ghiassian and Richard S. Sutton}, title = {An Empirical Comparison of Off-policy Prediction Learning Algorithms on the Collision Task}, journal = {CoRR}, volume = {abs/2106.00922}, year = {2021}, url = {https://arxiv.org/abs/2106.00922}, eprinttype = {arXiv}, eprint = {2106.00922}, timestamp = {Wed, 09 Jun 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2106-00922.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2108-06325, author = {Shibhansh Dohare and A. Rupam Mahmood and Richard S. Sutton}, title = {Continual Backprop: Stochastic Gradient Descent with Persistent Randomness}, journal = {CoRR}, volume = {abs/2108.06325}, year = {2021}, url = {https://arxiv.org/abs/2108.06325}, eprinttype = {arXiv}, eprint = {2108.06325}, timestamp = {Wed, 18 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2108-06325.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-05110, author = {Sina Ghiassian and Richard S. Sutton}, title = {An Empirical Comparison of Off-policy Prediction Learning Algorithms in the Four Rooms Environment}, journal = {CoRR}, volume = {abs/2109.05110}, year = {2021}, url = {https://arxiv.org/abs/2109.05110}, eprinttype = {arXiv}, eprint = {2109.05110}, timestamp = {Tue, 21 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-05110.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2110-13855, author = {Yi Wan and Abhishek Naik and Richard S. Sutton}, title = {Average-Reward Learning and Planning with Options}, journal = {CoRR}, volume = {abs/2110.13855}, year = {2021}, url = {https://arxiv.org/abs/2110.13855}, eprinttype = {arXiv}, eprint = {2110.13855}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2110-13855.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2112-15236, author = {Amir Samani and Richard S. Sutton}, title = {Learning Agent State Online with Recurrent Generate-and-Test}, journal = {CoRR}, volume = {abs/2112.15236}, year = {2021}, url = {https://arxiv.org/abs/2112.15236}, eprinttype = {arXiv}, eprint = {2112.15236}, timestamp = {Wed, 05 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2112-15236.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jagi/MonettLTBBGBCCS20, author = {Dagmar Monett and Colin W. P. Lewis and Kristinn R. Th{\'{o}}risson and Joscha Bach and Gianluca Baldassarre and Giovanni Granato and Istvan S. N. Berkeley and Fran{\c{c}}ois Chollet and Matthew Crosby and Henry Shevlin and John F. Sowa and John E. Laird and Shane Legg and Peter Lindes and Tom{\'{a}}s Mikolov and William J. Rapaport and Ra{\'{u}}l Rojas and Marek Rosa and Peter Stone and Richard S. Sutton and Roman V. Yampolskiy and Pei Wang and Roger C. Schank and Aaron Sloman and Alan F. T. Winfield}, title = {Special Issue "On Defining Artificial Intelligence" - Commentaries and Author's Response}, journal = {J. Artif. Gen. Intell.}, volume = {11}, number = {2}, pages = {1--100}, year = {2020}, url = {https://doi.org/10.2478/jagi-2020-0003}, doi = {10.2478/JAGI-2020-0003}, timestamp = {Mon, 03 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/jagi/MonettLTBBGBCCS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/AsisCPSG20, author = {Kristopher De Asis and Alan Chan and Silviu Pitis and Richard S. Sutton and Daniel Graves}, title = {Fixed-Horizon Temporal Difference Methods for Stable Reinforcement Learning}, booktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI} 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA, February 7-12, 2020}, pages = {3741--3748}, publisher = {{AAAI} Press}, year = {2020}, url = {https://doi.org/10.1609/aaai.v34i04.5784}, doi = {10.1609/AAAI.V34I04.5784}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/AsisCPSG20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/OsbandDHASSMLSS20, author = {Ian Osband and Yotam Doron and Matteo Hessel and John Aslanides and Eren Sezener and Andre Saraiva and Katrina McKinney and Tor Lattimore and Csaba Szepesv{\'{a}}ri and Satinder Singh and Benjamin Van Roy and Richard S. Sutton and David Silver and Hado van Hasselt}, title = {Behaviour Suite for Reinforcement Learning}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=rygf-kSYwH}, timestamp = {Mon, 15 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/OsbandDHASSMLSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2006-16318, author = {Yi Wan and Abhishek Naik and Richard S. Sutton}, title = {Learning and Planning in Average-Reward Markov Decision Processes}, journal = {CoRR}, volume = {abs/2006.16318}, year = {2020}, url = {https://arxiv.org/abs/2006.16318}, eprinttype = {arXiv}, eprint = {2006.16318}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2006-16318.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2008-11329, author = {Alan Chan and Kristopher De Asis and Richard S. Sutton}, title = {Inverse Policy Evaluation for Value-based Sequential Decision-making}, journal = {CoRR}, volume = {abs/2008.11329}, year = {2020}, url = {https://arxiv.org/abs/2008.11329}, eprinttype = {arXiv}, eprint = {2008.11329}, timestamp = {Tue, 08 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2008-11329.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2008-12095, author = {Katya Kudashkina and Patrick M. Pilarski and Richard S. Sutton}, title = {Document-editing Assistants and Model-based Reinforcement Learning as a Path to Conversational {AI}}, journal = {CoRR}, volume = {abs/2008.12095}, year = {2020}, url = {https://arxiv.org/abs/2008.12095}, eprinttype = {arXiv}, eprint = {2008.12095}, timestamp = {Tue, 15 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2008-12095.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2010-15268, author = {Kenny Young and Richard S. Sutton}, title = {Understanding the Pathologies of Approximate Policy Evaluation when Combined with Greedification in Reinforcement Learning}, journal = {CoRR}, volume = {abs/2010.15268}, year = {2020}, url = {https://arxiv.org/abs/2010.15268}, eprinttype = {arXiv}, eprint = {2010.15268}, timestamp = {Tue, 03 Nov 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2010-15268.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/RafieeGWS19, author = {Banafsheh Rafiee and Sina Ghiassian and Adam White and Richard S. Sutton}, editor = {Edith Elkind and Manuela Veloso and Noa Agmon and Matthew E. Taylor}, title = {Prediction in Intelligence: An Empirical Comparison of Off-policy Algorithms on Robots}, booktitle = {Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems, {AAMAS} '19, Montreal, QC, Canada, May 13-17, 2019}, pages = {332--340}, publisher = {International Foundation for Autonomous Agents and Multiagent Systems}, year = {2019}, url = {http://dl.acm.org/citation.cfm?id=3331711}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/RafieeGWS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/TianS19, author = {Tian Tian and Richard S. Sutton}, editor = {Amal El Fallah Seghrouchni and David Sarne}, title = {Extending Sliding-Step Importance Weighting from Supervised Learning to Reinforcement Learning}, booktitle = {Artificial Intelligence. {IJCAI} 2019 International Workshops - Macao, China, August 10-12, 2019, Revised Selected Best Papers}, series = {Lecture Notes in Computer Science}, volume = {12158}, pages = {67--82}, publisher = {Springer}, year = {2019}, url = {https://doi.org/10.1007/978-3-030-56150-5\_4}, doi = {10.1007/978-3-030-56150-5\_4}, timestamp = {Thu, 16 Sep 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/TianS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/WanZWWS19, author = {Yi Wan and Muhammad Zaheer and Adam White and Martha White and Richard S. Sutton}, editor = {Sarit Kraus}, title = {Planning with Expectation Models}, booktitle = {Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16, 2019}, pages = {3649--3655}, publisher = {ijcai.org}, year = {2019}, url = {https://doi.org/10.24963/ijcai.2019/506}, doi = {10.24963/IJCAI.2019/506}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ijcai/WanZWWS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1901-07510, author = {J. Fernando Hernandez{-}Garcia and Richard S. Sutton}, title = {Understanding Multi-Step Deep Reinforcement Learning: {A} Systematic Study of the {DQN} Target}, journal = {CoRR}, volume = {abs/1901.07510}, year = {2019}, url = {http://arxiv.org/abs/1901.07510}, eprinttype = {arXiv}, eprint = {1901.07510}, timestamp = {Sat, 02 Feb 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1901-07510.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1903-00194, author = {Xiang Gu and Sina Ghiassian and Richard S. Sutton}, title = {Should All Temporal Difference Learning Use Emphasis?}, journal = {CoRR}, volume = {abs/1903.00194}, year = {2019}, url = {http://arxiv.org/abs/1903.00194}, eprinttype = {arXiv}, eprint = {1903.00194}, timestamp = {Tue, 31 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1903-00194.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1903-03252, author = {Alexandra Kearney and Vivek Veeriah and Jaden B. Travnik and Patrick M. Pilarski and Richard S. Sutton}, title = {Learning Feature Relevance Through Step Size Adaptation in Temporal-Difference Learning}, journal = {CoRR}, volume = {abs/1903.03252}, year = {2019}, url = {http://arxiv.org/abs/1903.03252}, eprinttype = {arXiv}, eprint = {1903.03252}, timestamp = {Sun, 31 Mar 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1903-03252.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1904-01191, author = {Yi Wan and Muhammad Zaheer and Adam White and Martha White and Richard S. Sutton}, title = {Planning with Expectation Models}, journal = {CoRR}, volume = {abs/1904.01191}, year = {2019}, url = {http://arxiv.org/abs/1904.01191}, eprinttype = {arXiv}, eprint = {1904.01191}, timestamp = {Thu, 14 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1904-01191.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1908-03568, author = {Ian Osband and Yotam Doron and Matteo Hessel and John Aslanides and Eren Sezener and Andre Saraiva and Katrina McKinney and Tor Lattimore and Csaba Szepesv{\'{a}}ri and Satinder Singh and Benjamin Van Roy and Richard S. Sutton and David Silver and Hado van Hasselt}, title = {Behaviour Suite for Reinforcement Learning}, journal = {CoRR}, volume = {abs/1908.03568}, year = {2019}, url = {http://arxiv.org/abs/1908.03568}, eprinttype = {arXiv}, eprint = {1908.03568}, timestamp = {Mon, 15 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1908-03568.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1909-03906, author = {Kristopher De Asis and Alan Chan and Silviu Pitis and Richard S. Sutton and Daniel Graves}, title = {Fixed-Horizon Temporal Difference Methods for Stable Reinforcement Learning}, journal = {CoRR}, volume = {abs/1909.03906}, year = {2019}, url = {http://arxiv.org/abs/1909.03906}, eprinttype = {arXiv}, eprint = {1909.03906}, timestamp = {Tue, 08 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1909-03906.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1910-02140, author = {Abhishek Naik and Roshan Shariff and Niko Yasui and Richard S. Sutton}, title = {Discounted Reinforcement Learning is Not an Optimization Problem}, journal = {CoRR}, volume = {abs/1910.02140}, year = {2019}, url = {http://arxiv.org/abs/1910.02140}, eprinttype = {arXiv}, eprint = {1910.02140}, timestamp = {Wed, 09 Oct 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1910-02140.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1912-04002, author = {J. Fernando Hernandez{-}Garcia and Richard S. Sutton}, title = {Learning Sparse Representations Incrementally in Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1912.04002}, year = {2019}, url = {http://arxiv.org/abs/1912.04002}, eprinttype = {arXiv}, eprint = {1912.04002}, timestamp = {Thu, 02 Jan 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1912-04002.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/firai/TravnikMSP18, author = {Jaden B. Travnik and Kory W. Mathewson and Richard S. Sutton and Patrick M. Pilarski}, title = {Reactive Reinforcement Learning in Asynchronous Environments}, journal = {Frontiers Robotics {AI}}, volume = {5}, pages = {79}, year = {2018}, url = {https://doi.org/10.3389/frobt.2018.00079}, doi = {10.3389/FROBT.2018.00079}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/firai/TravnikMSP18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/YuMS18, author = {Huizhen Yu and Ashique Rupam Mahmood and Richard S. Sutton}, title = {On Generalized Bellman Equations and Temporal-Difference Learning}, journal = {J. Mach. Learn. Res.}, volume = {19}, pages = {48:1--48:49}, year = {2018}, url = {http://jmlr.org/papers/v19/17-283.html}, timestamp = {Wed, 10 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/YuMS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/AsisHHS18, author = {Kristopher De Asis and J. Fernando Hernandez{-}Garcia and G. Zacharias Holland and Richard S. Sutton}, editor = {Sheila A. McIlraith and Kilian Q. Weinberger}, title = {Multi-Step Reinforcement Learning: {A} Unifying Algorithm}, booktitle = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence, (AAAI-18), the 30th innovative Applications of Artificial Intelligence (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February 2-7, 2018}, pages = {2902--2909}, publisher = {{AAAI} Press}, year = {2018}, url = {https://doi.org/10.1609/aaai.v32i1.11631}, doi = {10.1609/AAAI.V32I1.11631}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/AsisHHS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/SherstanABYWWS18, author = {Craig Sherstan and Dylan R. Ashley and Brendan Bennett and Kenny Young and Adam White and Martha White and Richard S. Sutton}, editor = {Amir Globerson and Ricardo Silva}, title = {Comparing Direct and Indirect Temporal-Difference Methods for Estimating the Variance of the Return}, booktitle = {Proceedings of the Thirty-Fourth Conference on Uncertainty in Artificial Intelligence, {UAI} 2018, Monterey, California, USA, August 6-10, 2018}, pages = {63--72}, publisher = {{AUAI} Press}, year = {2018}, url = {http://auai.org/uai2018/proceedings/papers/35.pdf}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/uai/SherstanABYWWS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/AsisS18, author = {Kristopher De Asis and Richard S. Sutton}, editor = {Amir Globerson and Ricardo Silva}, title = {Per-decision Multi-step Temporal Difference Learning with Control Variates}, booktitle = {Proceedings of the Thirty-Fourth Conference on Uncertainty in Artificial Intelligence, {UAI} 2018, Monterey, California, USA, August 6-10, 2018}, pages = {786--794}, publisher = {{AUAI} Press}, year = {2018}, url = {http://auai.org/uai2018/proceedings/papers/282.pdf}, timestamp = {Thu, 12 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/uai/AsisS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1801-08287, author = {Craig Sherstan and Brendan Bennett and Kenny Young and Dylan R. Ashley and Adam White and Martha White and Richard S. Sutton}, title = {Directly Estimating the Variance of the {\(\lambda\)}-Return Using Temporal-Difference Methods}, journal = {CoRR}, volume = {abs/1801.08287}, year = {2018}, url = {http://arxiv.org/abs/1801.08287}, eprinttype = {arXiv}, eprint = {1801.08287}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1801-08287.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1802-06139, author = {Jaden B. Travnik and Kory W. Mathewson and Richard S. Sutton and Patrick M. Pilarski}, title = {Reactive Reinforcement Learning in Asynchronous Environments}, journal = {CoRR}, volume = {abs/1802.06139}, year = {2018}, url = {http://arxiv.org/abs/1802.06139}, eprinttype = {arXiv}, eprint = {1802.06139}, timestamp = {Fri, 17 Dec 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1802-06139.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1804-03334, author = {Alexandra Kearney and Vivek Veeriah and Jaden B. Travnik and Richard S. Sutton and Patrick M. Pilarski}, title = {{TIDBD:} Adapting Temporal-difference Step-sizes Through Stochastic Meta-descent}, journal = {CoRR}, volume = {abs/1804.03334}, year = {2018}, url = {http://arxiv.org/abs/1804.03334}, eprinttype = {arXiv}, eprint = {1804.03334}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1804-03334.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1805-07476, author = {Sina Ghiassian and Huizhen Yu and Banafsheh Rafiee and Richard S. Sutton}, title = {Two geometric input transformation methods for fast online reinforcement learning with neural nets}, journal = {CoRR}, volume = {abs/1805.07476}, year = {2018}, url = {http://arxiv.org/abs/1805.07476}, eprinttype = {arXiv}, eprint = {1805.07476}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1805-07476.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1806-00540, author = {Kenny J. Young and Richard S. Sutton and Shuo Yang}, title = {Integrating Episodic Memory into a Reinforcement Learning Agent using Reservoir Sampling}, journal = {CoRR}, volume = {abs/1806.00540}, year = {2018}, url = {http://arxiv.org/abs/1806.00540}, eprinttype = {arXiv}, eprint = {1806.00540}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1806-00540.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1807-01830, author = {Kristopher De Asis and Richard S. Sutton}, title = {Per-decision Multi-step Temporal Difference Learning with Control Variates}, journal = {CoRR}, volume = {abs/1807.01830}, year = {2018}, url = {http://arxiv.org/abs/1807.01830}, eprinttype = {arXiv}, eprint = {1807.01830}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1807-01830.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1809-07435, author = {Kristopher De Asis and Brendan Bennett and Richard S. Sutton}, title = {Predicting Periodicity with Temporal Difference Learning}, journal = {CoRR}, volume = {abs/1809.07435}, year = {2018}, url = {http://arxiv.org/abs/1809.07435}, eprinttype = {arXiv}, eprint = {1809.07435}, timestamp = {Fri, 05 Oct 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1809-07435.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-02597, author = {Sina Ghiassian and Andrew Patterson and Martha White and Richard S. Sutton and Adam White}, title = {Online Off-policy Prediction}, journal = {CoRR}, volume = {abs/1811.02597}, year = {2018}, url = {http://arxiv.org/abs/1811.02597}, eprinttype = {arXiv}, eprint = {1811.02597}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-02597.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ai/YuMS17, author = {Huizhen Yu and Ashique Rupam Mahmood and Richard S. Sutton}, editor = {Malek Mouhoub and Philippe Langlais}, title = {On Generalized Bellman Equations and Temporal-Difference Learning}, booktitle = {Advances in Artificial Intelligence - 30th Canadian Conference on Artificial Intelligence, Canadian {AI} 2017, Edmonton, AB, Canada, May 16-19, 2017, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {10233}, pages = {3--14}, year = {2017}, url = {https://doi.org/10.1007/978-3-319-57351-9\_1}, doi = {10.1007/978-3-319-57351-9\_1}, timestamp = {Thu, 28 Sep 2023 12:27:16 +0200}, biburl = {https://dblp.org/rec/conf/ai/YuMS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/VeeriahSS17, author = {Vivek Veeriah and Harm van Seijen and Richard S. Sutton}, editor = {Kate Larson and Michael Winikoff and Sanmay Das and Edmund H. Durfee}, title = {Forward Actor-Critic for Nonlinear Function Approximation in Reinforcement Learning}, booktitle = {Proceedings of the 16th Conference on Autonomous Agents and MultiAgent Systems, {AAMAS} 2017, S{\~{a}}o Paulo, Brazil, May 8-12, 2017}, pages = {556--564}, publisher = {{ACM}}, year = {2017}, url = {http://dl.acm.org/citation.cfm?id=3091207}, timestamp = {Wed, 27 Sep 2017 07:24:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/VeeriahSS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/pkdd/VeeriahZS17, author = {Vivek Veeriah and Shangtong Zhang and Richard S. Sutton}, editor = {Michelangelo Ceci and Jaakko Hollm{\'{e}}n and Ljupco Todorovski and Celine Vens and Saso Dzeroski}, title = {Crossprop: Learning Representations by Stochastic Meta-Gradient Descent in Neural Networks}, booktitle = {Machine Learning and Knowledge Discovery in Databases - European Conference, {ECML} {PKDD} 2017, Skopje, Macedonia, September 18-22, 2017, Proceedings, Part {I}}, series = {Lecture Notes in Computer Science}, volume = {10534}, pages = {445--459}, publisher = {Springer}, year = {2017}, url = {https://doi.org/10.1007/978-3-319-71249-9\_27}, doi = {10.1007/978-3-319-71249-9\_27}, timestamp = {Tue, 14 May 2019 10:00:47 +0200}, biburl = {https://dblp.org/rec/conf/pkdd/VeeriahZS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/MahmoodYS17, author = {Ashique Rupam Mahmood and Huizhen Yu and Richard S. Sutton}, title = {Multi-step Off-policy Learning Without Importance Sampling Ratios}, journal = {CoRR}, volume = {abs/1702.03006}, year = {2017}, url = {http://arxiv.org/abs/1702.03006}, eprinttype = {arXiv}, eprint = {1702.03006}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/MahmoodYS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/AsisHHS17, author = {Kristopher De Asis and J. Fernando Hernandez{-}Garcia and G. Zacharias Holland and Richard S. Sutton}, title = {Multi-step Reinforcement Learning: {A} Unifying Algorithm}, journal = {CoRR}, volume = {abs/1703.01327}, year = {2017}, url = {http://arxiv.org/abs/1703.01327}, eprinttype = {arXiv}, eprint = {1703.01327}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/AsisHHS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/YuMS17, author = {Huizhen Yu and Ashique Rupam Mahmood and Richard S. Sutton}, title = {On Generalized Bellman Equations and Temporal-Difference Learning}, journal = {CoRR}, volume = {abs/1704.04463}, year = {2017}, url = {http://arxiv.org/abs/1704.04463}, eprinttype = {arXiv}, eprint = {1704.04463}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/YuMS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/LeeS17a, author = {Jae Young Lee and Richard S. Sutton}, title = {Integral Policy Iterations for Reinforcement Learning Problems in Continuous Time and Space}, journal = {CoRR}, volume = {abs/1705.03520}, year = {2017}, url = {http://arxiv.org/abs/1705.03520}, eprinttype = {arXiv}, eprint = {1705.03520}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/LeeS17a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/WhiteS17, author = {Adam White and Richard S. Sutton}, title = {GQ({\textdollar}{\(\lambda\)}{\textdollar}) Quick Reference and Implementation Guide}, journal = {CoRR}, volume = {abs/1705.03967}, year = {2017}, url = {http://arxiv.org/abs/1705.03967}, eprinttype = {arXiv}, eprint = {1705.03967}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/WhiteS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/GhiassianRS17, author = {Sina Ghiassian and Banafsheh Rafiee and Richard S. Sutton}, title = {A First Empirical Study of Emphatic Temporal Difference Learning}, journal = {CoRR}, volume = {abs/1705.04185}, year = {2017}, url = {http://arxiv.org/abs/1705.04185}, eprinttype = {arXiv}, eprint = {1705.04185}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/GhiassianRS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1711-03676, author = {Patrick M. Pilarski and Richard S. Sutton and Kory W. Mathewson and Craig Sherstan and Adam S. R. Parker and Ann L. Edwards}, title = {Communicative Capital for Prosthetic Agents}, journal = {CoRR}, volume = {abs/1711.03676}, year = {2017}, url = {http://arxiv.org/abs/1711.03676}, eprinttype = {arXiv}, eprint = {1711.03676}, timestamp = {Fri, 17 Dec 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1711-03676.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1712-01275, author = {Shangtong Zhang and Richard S. Sutton}, title = {A Deeper Look at Experience Replay}, journal = {CoRR}, volume = {abs/1712.01275}, year = {2017}, url = {http://arxiv.org/abs/1712.01275}, eprinttype = {arXiv}, eprint = {1712.01275}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1712-01275.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/SuttonMW16, author = {Richard S. Sutton and Ashique Rupam Mahmood and Martha White}, title = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference Learning}, journal = {J. Mach. Learn. Res.}, volume = {17}, pages = {73:1--73:29}, year = {2016}, url = {http://jmlr.org/papers/v17/14-488.html}, timestamp = {Wed, 10 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/SuttonMW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/SeijenMPMS16, author = {Harm van Seijen and Ashique Rupam Mahmood and Patrick M. Pilarski and Marlos C. Machado and Richard S. Sutton}, title = {True Online Temporal-Difference Learning}, journal = {J. Mach. Learn. Res.}, volume = {17}, pages = {145:1--145:40}, year = {2016}, url = {http://jmlr.org/papers/v17/15-599.html}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/SeijenMPMS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/VeeriahPS16, author = {Vivek Veeriah and Patrick M. Pilarski and Richard S. Sutton}, title = {Face valuing: Training user interfaces with facial expressions and reinforcement learning}, journal = {CoRR}, volume = {abs/1606.02807}, year = {2016}, url = {http://arxiv.org/abs/1606.02807}, eprinttype = {arXiv}, eprint = {1606.02807}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/VeeriahPS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/MurphyDLMSW16, author = {Susan A. Murphy and Yanzhen Deng and Eric B. Laber and Hamid Reza Maei and Richard S. Sutton and Katie Witkiewitz}, title = {A Batch, Off-Policy, Actor-Critic Algorithm for Optimizing the Average Reward}, journal = {CoRR}, volume = {abs/1607.05047}, year = {2016}, url = {http://arxiv.org/abs/1607.05047}, eprinttype = {arXiv}, eprint = {1607.05047}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/MurphyDLMSW16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/SuttonV16, author = {Richard S. Sutton and Vivek Veeriah}, title = {Learning representations through stochastic gradient descent in cross-validation error}, journal = {CoRR}, volume = {abs/1612.02879}, year = {2016}, url = {http://arxiv.org/abs/1612.02879}, eprinttype = {arXiv}, eprint = {1612.02879}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/SuttonV16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/VanseijenS15, author = {Harm Vanseijen and Richard S. Sutton}, editor = {Francis R. Bach and David M. Blei}, title = {A Deeper Look at Planning as Learning from Replay}, booktitle = {Proceedings of the 32nd International Conference on Machine Learning, {ICML} 2015, Lille, France, 6-11 July 2015}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {37}, pages = {2314--2322}, publisher = {JMLR.org}, year = {2015}, url = {http://proceedings.mlr.press/v37/vanseijen15.html}, timestamp = {Thu, 28 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/VanseijenS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/MahmoodS15, author = {Ashique Rupam Mahmood and Richard S. Sutton}, editor = {Marina Meila and Tom Heskes}, title = {Off-policy learning based on weighted importance sampling with linear computational complexity}, booktitle = {Proceedings of the Thirty-First Conference on Uncertainty in Artificial Intelligence, {UAI} 2015, July 12-16, 2015, Amsterdam, The Netherlands}, pages = {552--561}, publisher = {{AUAI} Press}, year = {2015}, url = {http://auai.org/uai2015/proceedings/papers/165.pdf}, timestamp = {Thu, 12 Mar 2020 11:31:09 +0100}, biburl = {https://dblp.org/rec/conf/uai/MahmoodS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/SuttonMW15, author = {Richard S. Sutton and Ashique Rupam Mahmood and Martha White}, title = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference Learning}, journal = {CoRR}, volume = {abs/1503.04269}, year = {2015}, url = {http://arxiv.org/abs/1503.04269}, eprinttype = {arXiv}, eprint = {1503.04269}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/SuttonMW15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/SuttonT15, author = {Richard S. Sutton and Brian Tanner}, title = {Temporal-Difference Networks}, journal = {CoRR}, volume = {abs/1504.05539}, year = {2015}, url = {http://arxiv.org/abs/1504.05539}, eprinttype = {arXiv}, eprint = {1504.05539}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/SuttonT15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/SeijenMPS15, author = {Harm van Seijen and Ashique Rupam Mahmood and Patrick M. Pilarski and Richard S. Sutton}, title = {An Empirical Evaluation of True Online TD({\(\lambda\)})}, journal = {CoRR}, volume = {abs/1507.00353}, year = {2015}, url = {http://arxiv.org/abs/1507.00353}, eprinttype = {arXiv}, eprint = {1507.00353}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/SeijenMPS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/MahmoodYWS15, author = {Ashique Rupam Mahmood and Huizhen Yu and Martha White and Richard S. Sutton}, title = {Emphatic Temporal-Difference Learning}, journal = {CoRR}, volume = {abs/1507.01569}, year = {2015}, url = {http://arxiv.org/abs/1507.01569}, eprinttype = {arXiv}, eprint = {1507.01569}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/MahmoodYWS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/Sutton15, author = {Richard S. Sutton}, title = {True Online Emphatic TD({\(\lambda\)}): Quick Reference and Implementation Guide}, journal = {CoRR}, volume = {abs/1507.07147}, year = {2015}, url = {http://arxiv.org/abs/1507.07147}, eprinttype = {arXiv}, eprint = {1507.07147}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/Sutton15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HasseltS15, author = {Hado van Hasselt and Richard S. Sutton}, title = {Learning to Predict Independent of Span}, journal = {CoRR}, volume = {abs/1508.04582}, year = {2015}, url = {http://arxiv.org/abs/1508.04582}, eprinttype = {arXiv}, eprint = {1508.04582}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HasseltS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/SeijenMPMS15, author = {Harm van Seijen and Ashique Rupam Mahmood and Patrick M. Pilarski and Marlos C. Machado and Richard S. Sutton}, title = {True Online Temporal-Difference Learning}, journal = {CoRR}, volume = {abs/1512.04087}, year = {2015}, url = {http://arxiv.org/abs/1512.04087}, eprinttype = {arXiv}, eprint = {1512.04087}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/SeijenMPMS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/adb/ModayilWS14, author = {Joseph Modayil and Adam White and Richard S. Sutton}, title = {Multi-timescale nexting in a reinforcement learning robot}, journal = {Adapt. Behav.}, volume = {22}, number = {2}, pages = {146--160}, year = {2014}, url = {https://doi.org/10.1177/1059712313511648}, doi = {10.1177/1059712313511648}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/adb/ModayilWS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SuttonMPH14, author = {Richard S. Sutton and Ashique Rupam Mahmood and Doina Precup and Hado van Hasselt}, title = {A new Q(lambda) with interim forward view and Monte Carlo equivalence}, booktitle = {Proceedings of the 31th International Conference on Machine Learning, {ICML} 2014, Beijing, China, 21-26 June 2014}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {32}, pages = {568--576}, publisher = {JMLR.org}, year = {2014}, url = {http://proceedings.mlr.press/v32/sutton14.html}, timestamp = {Wed, 29 May 2019 08:41:45 +0200}, biburl = {https://dblp.org/rec/conf/icml/SuttonMPH14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SeijenS14, author = {Harm van Seijen and Richard S. Sutton}, title = {True Online TD(lambda)}, booktitle = {Proceedings of the 31th International Conference on Machine Learning, {ICML} 2014, Beijing, China, 21-26 June 2014}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {32}, pages = {692--700}, publisher = {JMLR.org}, year = {2014}, url = {http://proceedings.mlr.press/v32/seijen14.html}, timestamp = {Wed, 29 May 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/SeijenS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/YaoSSMB14, author = {Hengshuai Yao and Csaba Szepesv{\'{a}}ri and Richard S. Sutton and Joseph Modayil and Shalabh Bhatnagar}, editor = {Zoubin Ghahramani and Max Welling and Corinna Cortes and Neil D. Lawrence and Kilian Q. Weinberger}, title = {Universal Option Models}, booktitle = {Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, December 8-13 2014, Montreal, Quebec, Canada}, pages = {990--998}, year = {2014}, url = {https://proceedings.neurips.cc/paper/2014/hash/996a7fa078cc36c46d02f9af3bef918b-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/YaoSSMB14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/MahmoodHS14, author = {Ashique Rupam Mahmood and Hado van Hasselt and Richard S. Sutton}, editor = {Zoubin Ghahramani and Max Welling and Corinna Cortes and Neil D. Lawrence and Kilian Q. Weinberger}, title = {Weighted importance sampling for off-policy learning with linear function approximation}, booktitle = {Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, December 8-13 2014, Montreal, Quebec, Canada}, pages = {3014--3022}, year = {2014}, url = {https://proceedings.neurips.cc/paper/2014/hash/be53ee61104935234b174e62a07e53cf-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/MahmoodHS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/HasseltMS14, author = {Hado van Hasselt and Ashique Rupam Mahmood and Richard S. Sutton}, editor = {Nevin L. Zhang and Jin Tian}, title = {Off-policy {TD(} l) with a true online equivalence}, booktitle = {Proceedings of the Thirtieth Conference on Uncertainty in Artificial Intelligence, {UAI} 2014, Quebec City, Quebec, Canada, July 23-27, 2014}, pages = {330--339}, publisher = {{AUAI} Press}, year = {2014}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=2468\&\#38;proceeding\_id=30}, timestamp = {Wed, 03 Feb 2021 11:09:27 +0100}, biburl = {https://dblp.org/rec/conf/uai/HasseltMS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ram/PilarskiDDCCHS13, author = {Patrick M. Pilarski and Michael Rory Dawson and Thomas Degris and Jason P. Carey and K. Ming Chan and Jacqueline S. Hebert and Richard S. Sutton}, title = {Adaptive Artificial Limbs: {A} Real-Time Approach to Prediction and Anticipation}, journal = {{IEEE} Robotics Autom. Mag.}, volume = {20}, number = {1}, pages = {53--64}, year = {2013}, url = {https://doi.org/10.1109/MRA.2012.2229948}, doi = {10.1109/MRA.2012.2229948}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ram/PilarskiDDCCHS13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/MahmoodS13, author = {Ashique Rupam Mahmood and Richard S. Sutton}, title = {Representation Search through Generate and Test}, booktitle = {Learning Rich Representations from Low-Level Sensors, Papers from the 2013 {AAAI} Workshop, Bellevue, Washington, USA, July 15, 2013}, series = {{AAAI} Technical Report}, volume = {{WS-13-12}}, publisher = {{AAAI}}, year = {2013}, url = {http://www.aaai.org/ocs/index.php/WS/AAAIW13/paper/view/7164}, timestamp = {Tue, 05 Sep 2023 08:59:27 +0200}, biburl = {https://dblp.org/rec/conf/aaai/MahmoodS13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aips/SilverSM13, author = {David Silver and Richard S. Sutton and Martin M{\"{u}}ller}, editor = {Daniel Borrajo and Subbarao Kambhampati and Angelo Oddi and Simone Fratini}, title = {Temporal-Difference Search in Computer Go}, booktitle = {Proceedings of the Twenty-Third International Conference on Automated Planning and Scheduling, {ICAPS} 2013, Rome, Italy, June 10-14, 2013}, publisher = {{AAAI}}, year = {2013}, url = {http://www.aaai.org/ocs/index.php/ICAPS/ICAPS13/paper/view/6037}, timestamp = {Wed, 29 Mar 2017 16:45:27 +0200}, biburl = {https://dblp.org/rec/conf/aips/SilverSM13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SeijenS13, author = {Harm van Seijen and Richard S. Sutton}, title = {Planning by Prioritized Sweeping with Small Backups}, booktitle = {Proceedings of the 30th International Conference on Machine Learning, {ICML} 2013, Atlanta, GA, USA, 16-21 June 2013}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {28}, pages = {361--369}, publisher = {JMLR.org}, year = {2013}, url = {http://proceedings.mlr.press/v28/vanseijen13.html}, timestamp = {Wed, 29 May 2019 08:41:45 +0200}, biburl = {https://dblp.org/rec/conf/icml/SeijenS13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icorr/PilarskiDS13, author = {Patrick M. Pilarski and Travis B. Dick and Richard S. Sutton}, title = {Real-time prediction learning for the simultaneous actuation of multiple prosthetic joints}, booktitle = {{IEEE} 13th International Conference on Rehabilitation Robotics, {ICORR} 2013, Seattle, WA, USA, June 24-26, 2013}, pages = {1--8}, publisher = {{IEEE}}, year = {2013}, url = {https://doi.org/10.1109/ICORR.2013.6650435}, doi = {10.1109/ICORR.2013.6650435}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icorr/PilarskiDS13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sara/MahmoodS13, author = {Ashique Rupam Mahmood and Richard S. Sutton}, editor = {Alan M. Frisch and Peter Gregory}, title = {Position Paper: Representation Search through Generate and Test}, booktitle = {Proceedings of the Tenth Symposium on Abstraction, Reformulation, and Approximation, {SARA} 2013, 11-12 July 2013, Leavenworth, Washington, {USA}}, publisher = {{AAAI}}, year = {2013}, url = {http://www.aaai.org/ocs/index.php/SARA/SARA13/paper/view/7255}, timestamp = {Tue, 09 Feb 2021 08:32:52 +0100}, biburl = {https://dblp.org/rec/conf/sara/MahmoodS13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1301-2343, author = {Harm van Seijen and Richard S. Sutton}, title = {Planning by Prioritized Sweeping with Small Backups}, journal = {CoRR}, volume = {abs/1301.2343}, year = {2013}, url = {http://arxiv.org/abs/1301.2343}, eprinttype = {arXiv}, eprint = {1301.2343}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1301-2343.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/EdwardsKDSP13, author = {Ann L. Edwards and Alexandra Kearney and Michael Rory Dawson and Richard S. Sutton and Patrick M. Pilarski}, title = {Temporal-Difference Learning to Assist Human Decision Making during the Control of an Artificial Limb}, journal = {CoRR}, volume = {abs/1309.4714}, year = {2013}, url = {http://arxiv.org/abs/1309.4714}, eprinttype = {arXiv}, eprint = {1309.4714}, timestamp = {Tue, 17 Sep 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/EdwardsKDSP13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/SilverSM12, author = {David Silver and Richard S. Sutton and Martin M{\"{u}}ller}, title = {Temporal-difference search in computer Go}, journal = {Mach. Learn.}, volume = {87}, number = {2}, pages = {183--219}, year = {2012}, url = {https://doi.org/10.1007/s10994-012-5280-0}, doi = {10.1007/S10994-012-5280-0}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ml/SilverSM12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaaifs/PilarskiS12, author = {Patrick M. Pilarski and Richard S. Sutton}, title = {Between Instruction and Reward: Human-Prompted Switching}, booktitle = {Robots Learning Interactively from Human Teachers, Papers from the 2012 {AAAI} Fall Symposium, Arlington, Virginia, USA, November 2-4, 2012}, series = {{AAAI} Technical Report}, volume = {{FS-12-07}}, publisher = {{AAAI}}, year = {2012}, url = {http://www.aaai.org/ocs/index.php/FSS/FSS12/paper/view/5496}, timestamp = {Tue, 08 Mar 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/aaaifs/PilarskiS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/amcc/DegrisPS12, author = {Thomas Degris and Patrick M. Pilarski and Richard S. Sutton}, title = {Model-Free reinforcement learning with continuous action in practice}, booktitle = {American Control Conference, {ACC} 2012, Montreal, QC, Canada, June 27-29, 2012}, pages = {2177--2182}, publisher = {{IEEE}}, year = {2012}, url = {https://doi.org/10.1109/ACC.2012.6315022}, doi = {10.1109/ACC.2012.6315022}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/amcc/DegrisPS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/MahmoodSDP12, author = {Ashique Rupam Mahmood and Richard S. Sutton and Thomas Degris and Patrick M. Pilarski}, title = {Tuning-free step-size adaptation}, booktitle = {2012 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2012, Kyoto, Japan, March 25-30, 2012}, pages = {2121--2124}, publisher = {{IEEE}}, year = {2012}, url = {https://doi.org/10.1109/ICASSP.2012.6288330}, doi = {10.1109/ICASSP.2012.6288330}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/MahmoodSDP12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icdl-epirob/WhiteMS12, author = {Adam White and Joseph Modayil and Richard S. Sutton}, title = {Scaling life-long off-policy learning}, booktitle = {2012 {IEEE} International Conference on Development and Learning and Epigenetic Robotics, {ICDL-EPIROB} 2012, San Diego, CA, USA, November 7-9, 2012}, pages = {1--6}, publisher = {{IEEE}}, year = {2012}, url = {https://doi.org/10.1109/DevLrn.2012.6400860}, doi = {10.1109/DEVLRN.2012.6400860}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icdl-epirob/WhiteMS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/DegrisWS12, author = {Thomas Degris and Martha White and Richard S. Sutton}, title = {Linear Off-Policy Actor-Critic}, booktitle = {Proceedings of the 29th International Conference on Machine Learning, {ICML} 2012, Edinburgh, Scotland, UK, June 26 - July 1, 2012}, publisher = {icml.cc / Omnipress}, year = {2012}, url = {http://icml.cc/2012/papers/268.pdf}, timestamp = {Wed, 03 Apr 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/DegrisWS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/sab/ModayilWS12, author = {Joseph Modayil and Adam White and Richard S. Sutton}, editor = {Tom Ziemke and Christian Balkenius and John Hallam}, title = {Multi-timescale Nexting in a Reinforcement Learning Robot}, booktitle = {From Animals to Animats 12 - 12th International Conference on Simulation of Adaptive Behavior, {SAB} 2012, Odense, Denmark, August 27-30, 2012. Proceedings}, series = {Lecture Notes in Computer Science}, volume = {7426}, pages = {299--309}, publisher = {Springer}, year = {2012}, url = {https://doi.org/10.1007/978-3-642-33093-3\_30}, doi = {10.1007/978-3-642-33093-3\_30}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/sab/ModayilWS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/smc/ModayilWPS12, author = {Joseph Modayil and Adam White and Patrick M. Pilarski and Richard S. Sutton}, title = {Acquiring a broad range of empirical knowledge in real time by temporal-difference learning}, booktitle = {Proceedings of the {IEEE} International Conference on Systems, Man, and Cybernetics, {SMC} 2012, Seoul, Korea (South), October 14-17, 2012}, pages = {1903--1910}, publisher = {{IEEE}}, year = {2012}, url = {https://doi.org/10.1109/ICSMC.2012.6378016}, doi = {10.1109/ICSMC.2012.6378016}, timestamp = {Sat, 30 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/smc/ModayilWPS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1205-4839, author = {Thomas Degris and Martha White and Richard S. Sutton}, title = {Off-Policy Actor-Critic}, journal = {CoRR}, volume = {abs/1205.4839}, year = {2012}, url = {http://arxiv.org/abs/1205.4839}, eprinttype = {arXiv}, eprint = {1205.4839}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1205-4839.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1206-3285, author = {Richard S. Sutton and Csaba Szepesv{\'{a}}ri and Alborz Geramifard and Michael Bowling}, title = {Dyna-Style Planning with Linear Function Approximation and Prioritized Sweeping}, journal = {CoRR}, volume = {abs/1206.3285}, year = {2012}, url = {http://arxiv.org/abs/1206.3285}, eprinttype = {arXiv}, eprint = {1206.3285}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1206-3285.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1206-6262, author = {Adam White and Joseph Modayil and Richard S. Sutton}, title = {Scaling Life-long Off-policy Learning}, journal = {CoRR}, volume = {abs/1206.6262}, year = {2012}, url = {http://arxiv.org/abs/1206.6262}, eprinttype = {arXiv}, eprint = {1206.6262}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1206-6262.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/SuttonMDDPWP11, author = {Richard S. Sutton and Joseph Modayil and Michael Delp and Thomas Degris and Patrick M. Pilarski and Adam White and Doina Precup}, editor = {Liz Sonenberg and Peter Stone and Kagan Tumer and Pinar Yolum}, title = {Horde: a scalable real-time architecture for learning knowledge from unsupervised sensorimotor interaction}, booktitle = {10th International Conference on Autonomous Agents and Multiagent Systems {(AAMAS} 2011), Taipei, Taiwan, May 2-6, 2011, Volume 1-3}, pages = {761--768}, publisher = {{IFAAMAS}}, year = {2011}, url = {http://portal.acm.org/citation.cfm?id=2031726\&\#38;CFID=54178199\&\#38;CFTOKEN=61392764}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/SuttonMDDPWP11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ilp/Sutton12, author = {Richard S. Sutton}, editor = {Stephen H. Muggleton and Alireza Tamaddoni{-}Nezhad and Francesca A. Lisi}, title = {Beyond Reward: The Problem of Knowledge and Data}, booktitle = {Inductive Logic Programming - 21st International Conference, {ILP} 2011, Windsor Great Park, UK, July 31 - August 3, 2011, Revised Selected Papers}, series = {Lecture Notes in Computer Science}, volume = {7207}, pages = {2--6}, publisher = {Springer}, year = {2011}, url = {https://doi.org/10.1007/978-3-642-31951-8\_2}, doi = {10.1007/978-3-642-31951-8\_2}, timestamp = {Sat, 30 Apr 2022 12:43:31 +0200}, biburl = {https://dblp.org/rec/conf/ilp/Sutton12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1112-1133, author = {Joseph Modayil and Adam White and Richard S. Sutton}, title = {Multi-timescale Nexting in a Reinforcement Learning Robot}, journal = {CoRR}, volume = {abs/1112.1133}, year = {2011}, url = {http://arxiv.org/abs/1112.1133}, eprinttype = {arXiv}, eprint = {1112.1133}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1112-1133.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/MaeiSBS10, author = {Hamid Reza Maei and Csaba Szepesv{\'{a}}ri and Shalabh Bhatnagar and Richard S. Sutton}, editor = {Johannes F{\"{u}}rnkranz and Thorsten Joachims}, title = {Toward Off-Policy Learning Control with Function Approximation}, booktitle = {Proceedings of the 27th International Conference on Machine Learning (ICML-10), June 21-24, 2010, Haifa, Israel}, pages = {719--726}, publisher = {Omnipress}, year = {2010}, url = {https://icml.cc/Conferences/2010/papers/627.pdf}, timestamp = {Wed, 03 Apr 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/MaeiSBS10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/automatica/BhatnagarSGL09, author = {Shalabh Bhatnagar and Richard S. Sutton and Mohammad Ghavamzadeh and Mark Lee}, title = {Natural actor-critic algorithms}, journal = {Autom.}, volume = {45}, number = {11}, pages = {2471--2482}, year = {2009}, url = {https://doi.org/10.1016/j.automatica.2009.07.008}, doi = {10.1016/J.AUTOMATICA.2009.07.008}, timestamp = {Sat, 09 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/automatica/BhatnagarSGL09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SuttonMPBSSW09, author = {Richard S. Sutton and Hamid Reza Maei and Doina Precup and Shalabh Bhatnagar and David Silver and Csaba Szepesv{\'{a}}ri and Eric Wiewiora}, editor = {Andrea Pohoreckyj Danyluk and L{\'{e}}on Bottou and Michael L. Littman}, title = {Fast gradient-descent methods for temporal-difference learning with linear function approximation}, booktitle = {Proceedings of the 26th Annual International Conference on Machine Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009}, series = {{ACM} International Conference Proceeding Series}, volume = {382}, pages = {993--1000}, publisher = {{ACM}}, year = {2009}, url = {https://doi.org/10.1145/1553374.1553501}, doi = {10.1145/1553374.1553501}, timestamp = {Sat, 09 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/SuttonMPBSSW09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/MaeiSBPSS09, author = {Hamid Reza Maei and Csaba Szepesv{\'{a}}ri and Shalabh Bhatnagar and Doina Precup and David Silver and Richard S. Sutton}, editor = {Yoshua Bengio and Dale Schuurmans and John D. Lafferty and Christopher K. I. Williams and Aron Culotta}, title = {Convergent Temporal-Difference Learning with Arbitrary Smooth Function Approximation}, booktitle = {Advances in Neural Information Processing Systems 22: 23rd Annual Conference on Neural Information Processing Systems 2009. Proceedings of a meeting held 7-10 December 2009, Vancouver, British Columbia, Canada}, pages = {1204--1212}, publisher = {Curran Associates, Inc.}, year = {2009}, url = {https://proceedings.neurips.cc/paper/2009/hash/3a15c7d0bbe60300a39f76f8a5ba6896-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/MaeiSBPSS09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/YaoSBDS09, author = {Hengshuai Yao and Richard S. Sutton and Shalabh Bhatnagar and Diao Dongcui and Csaba Szepesv{\'{a}}ri}, editor = {Yoshua Bengio and Dale Schuurmans and John D. Lafferty and Christopher K. I. Williams and Aron Culotta}, title = {Multi-Step Dyna Planning for Policy Evaluation and Control}, booktitle = {Advances in Neural Information Processing Systems 22: 23rd Annual Conference on Neural Information Processing Systems 2009. Proceedings of a meeting held 7-10 December 2009, Vancouver, British Columbia, Canada}, pages = {2187--2195}, publisher = {Curran Associates, Inc.}, year = {2009}, url = {https://proceedings.neurips.cc/paper/2009/hash/c52f1bd66cc19d05628bd8bf27af3ad6-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/YaoSBDS09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/neco/LudvigSK08, author = {Elliot A. Ludvig and Richard S. Sutton and E. James Kehoe}, title = {Stimulus Representation and the Timing of Reward-Prediction Errors in Models of the Dopamine System}, journal = {Neural Comput.}, volume = {20}, number = {12}, pages = {3034--3054}, year = {2008}, url = {https://doi.org/10.1162/neco.2008.11-07-654}, doi = {10.1162/NECO.2008.11-07-654}, timestamp = {Tue, 01 Sep 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/neco/LudvigSK08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aiide/CutumisuSBS08, author = {Maria Cutumisu and Duane Szafron and Michael H. Bowling and Richard S. Sutton}, editor = {Christian Darken and Michael Mateas}, title = {Agent Learning using Action-Dependent Learning Rates in Computer Role-Playing Games}, booktitle = {Proceedings of the Fourth Artificial Intelligence and Interactive Digital Entertainment Conference, October 22-24, 2008, Stanford, California, {USA}}, publisher = {The {AAAI} Press}, year = {2008}, url = {http://www.aaai.org/Library/AIIDE/2008/aiide08-004.php}, timestamp = {Wed, 10 Feb 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/aiide/CutumisuSBS08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SilverSM08, author = {David Silver and Richard S. Sutton and Martin M{\"{u}}ller}, editor = {William W. Cohen and Andrew McCallum and Sam T. Roweis}, title = {Sample-based learning and search with permanent and transient memories}, booktitle = {Machine Learning, Proceedings of the Twenty-Fifth International Conference {(ICML} 2008), Helsinki, Finland, June 5-9, 2008}, series = {{ACM} International Conference Proceeding Series}, volume = {307}, pages = {968--975}, publisher = {{ACM}}, year = {2008}, url = {https://doi.org/10.1145/1390156.1390278}, doi = {10.1145/1390156.1390278}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/SilverSM08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/LudvigSVK08, author = {Elliot A. Ludvig and Richard S. Sutton and Eric Verbeek and E. James Kehoe}, editor = {Daphne Koller and Dale Schuurmans and Yoshua Bengio and L{\'{e}}on Bottou}, title = {A computational model of hippocampal function in trace conditioning}, booktitle = {Advances in Neural Information Processing Systems 21, Proceedings of the Twenty-Second Annual Conference on Neural Information Processing Systems, Vancouver, British Columbia, Canada, December 8-11, 2008}, pages = {993--1000}, publisher = {Curran Associates, Inc.}, year = {2008}, url = {https://proceedings.neurips.cc/paper/2008/hash/d709f38ef758b5066ef31b18039b8ce5-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/LudvigSVK08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SuttonSM08, author = {Richard S. Sutton and Csaba Szepesv{\'{a}}ri and Hamid Reza Maei}, editor = {Daphne Koller and Dale Schuurmans and Yoshua Bengio and L{\'{e}}on Bottou}, title = {A Convergent O(n) Temporal-difference Algorithm for Off-policy Learning with Linear Function Approximation}, booktitle = {Advances in Neural Information Processing Systems 21, Proceedings of the Twenty-Second Annual Conference on Neural Information Processing Systems, Vancouver, British Columbia, Canada, December 8-11, 2008}, pages = {1609--1616}, publisher = {Curran Associates, Inc.}, year = {2008}, url = {https://proceedings.neurips.cc/paper/2008/hash/e0c641195b27425bb056ac56f8953d24-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/SuttonSM08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/SuttonSGB08, author = {Richard S. Sutton and Csaba Szepesv{\'{a}}ri and Alborz Geramifard and Michael H. Bowling}, editor = {David A. McAllester and Petri Myllym{\"{a}}ki}, title = {Dyna-Style Planning with Linear Function Approximation and Prioritized Sweeping}, booktitle = {{UAI} 2008, Proceedings of the 24th Conference in Uncertainty in Artificial Intelligence, Helsinki, Finland, July 9-12, 2008}, pages = {528--536}, publisher = {{AUAI} Press}, year = {2008}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1971\&\#38;proceeding\_id=24}, timestamp = {Wed, 03 Feb 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/uai/SuttonSGB08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SuttonKS07, author = {Richard S. Sutton and Anna Koop and David Silver}, editor = {Zoubin Ghahramani}, title = {On the role of tracking in stationary environments}, booktitle = {Machine Learning, Proceedings of the Twenty-Fourth International Conference {(ICML} 2007), Corvallis, Oregon, USA, June 20-24, 2007}, series = {{ACM} International Conference Proceeding Series}, volume = {227}, pages = {871--878}, publisher = {{ACM}}, year = {2007}, url = {https://doi.org/10.1145/1273496.1273606}, doi = {10.1145/1273496.1273606}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/SuttonKS07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/SilverSM07, author = {David Silver and Richard S. Sutton and Martin M{\"{u}}ller}, editor = {Manuela M. Veloso}, title = {Reinforcement Learning of Local Shape in the Game of Go}, booktitle = {{IJCAI} 2007, Proceedings of the 20th International Joint Conference on Artificial Intelligence, Hyderabad, India, January 6-12, 2007}, pages = {1053--1058}, year = {2007}, url = {http://ijcai.org/Proceedings/07/Papers/170.pdf}, timestamp = {Tue, 20 Aug 2019 16:17:11 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/SilverSM07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/BhatnagarSGL07, author = {Shalabh Bhatnagar and Richard S. Sutton and Mohammad Ghavamzadeh and Mark Lee}, editor = {John C. Platt and Daphne Koller and Yoram Singer and Sam T. Roweis}, title = {Incremental Natural Actor-Critic Algorithms}, booktitle = {Advances in Neural Information Processing Systems 20, Proceedings of the Twenty-First Annual Conference on Neural Information Processing Systems, Vancouver, British Columbia, Canada, December 3-6, 2007}, pages = {105--112}, publisher = {Curran Associates, Inc.}, year = {2007}, url = {https://proceedings.neurips.cc/paper/2007/hash/6883966fd8f918a4aa29be29d2c386fb-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/BhatnagarSGL07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/GeramifardBS06, author = {Alborz Geramifard and Michael H. Bowling and Richard S. Sutton}, title = {Incremental Least-Squares Temporal Difference Learning}, booktitle = {Proceedings, The Twenty-First National Conference on Artificial Intelligence and the Eighteenth Innovative Applications of Artificial Intelligence Conference, July 16-20, 2006, Boston, Massachusetts, {USA}}, pages = {356--361}, publisher = {{AAAI} Press}, year = {2006}, url = {http://www.aaai.org/Library/AAAI/2006/aaai06-057.php}, timestamp = {Tue, 05 Sep 2023 09:10:47 +0200}, biburl = {https://dblp.org/rec/conf/aaai/GeramifardBS06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/GeramifardBZS06, author = {Alborz Geramifard and Michael H. Bowling and Martin Zinkevich and Richard S. Sutton}, editor = {Bernhard Sch{\"{o}}lkopf and John C. Platt and Thomas Hofmann}, title = {iLSTD: Eligibility Traces and Convergence Analysis}, booktitle = {Advances in Neural Information Processing Systems 19, Proceedings of the Twentieth Annual Conference on Neural Information Processing Systems, Vancouver, British Columbia, Canada, December 4-7, 2006}, pages = {441--448}, publisher = {{MIT} Press}, year = {2006}, url = {https://proceedings.neurips.cc/paper/2006/hash/6ad4174eba19ecb5fed17411a34ff5e6-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/GeramifardBZS06.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/adb/StoneSK05, author = {Peter Stone and Richard S. Sutton and Gregory Kuhlmann}, title = {Reinforcement Learning for RoboCup Soccer Keepaway}, journal = {Adapt. Behav.}, volume = {13}, number = {3}, pages = {165--188}, year = {2005}, url = {https://doi.org/10.1177/105971230501300301}, doi = {10.1177/105971230501300301}, timestamp = {Tue, 25 Aug 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/adb/StoneSK05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/TannerS05, author = {Brian Tanner and Richard S. Sutton}, editor = {Luc De Raedt and Stefan Wrobel}, title = {TD(lambda) networks: temporal-difference networks with eligibility traces}, booktitle = {Machine Learning, Proceedings of the Twenty-Second International Conference {(ICML} 2005), Bonn, Germany, August 7-11, 2005}, series = {{ACM} International Conference Proceeding Series}, volume = {119}, pages = {888--895}, publisher = {{ACM}}, year = {2005}, url = {https://doi.org/10.1145/1102351.1102463}, doi = {10.1145/1102351.1102463}, timestamp = {Tue, 06 Nov 2018 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/TannerS05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/RafolsRST05, author = {Eddie J. Rafols and Mark B. Ring and Richard S. Sutton and Brian Tanner}, editor = {Leslie Pack Kaelbling and Alessandro Saffiotti}, title = {Using Predictive Representations to Improve Generalization in Reinforcement Learning}, booktitle = {IJCAI-05, Proceedings of the Nineteenth International Joint Conference on Artificial Intelligence, Edinburgh, Scotland, UK, July 30 - August 5, 2005}, pages = {835--840}, publisher = {Professional Book Center}, year = {2005}, url = {http://ijcai.org/Proceedings/05/Papers/1650.pdf}, timestamp = {Tue, 20 Aug 2019 16:16:29 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/RafolsRST05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/TannerS05, author = {Brian Tanner and Richard S. Sutton}, editor = {Leslie Pack Kaelbling and Alessandro Saffiotti}, title = {Temporal-Difference Networks with History}, booktitle = {IJCAI-05, Proceedings of the Nineteenth International Joint Conference on Artificial Intelligence, Edinburgh, Scotland, UK, July 30 - August 5, 2005}, pages = {865--870}, publisher = {Professional Book Center}, year = {2005}, url = {http://ijcai.org/Proceedings/05/Papers/1640.pdf}, timestamp = {Tue, 20 Aug 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/TannerS05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/PrecupSPKS05, author = {Doina Precup and Richard S. Sutton and Cosmin Paduraru and Anna Koop and Satinder Singh}, title = {Off-policy Learning with Options and Recognizers}, booktitle = {Advances in Neural Information Processing Systems 18 [Neural Information Processing Systems, {NIPS} 2005, December 5-8, 2005, Vancouver, British Columbia, Canada]}, pages = {1097--1104}, year = {2005}, url = {https://proceedings.neurips.cc/paper/2005/hash/f75526659f31040afeb61cb7133e4e6d-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/PrecupSPKS05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SuttonRK05, author = {Richard S. Sutton and Eddie J. Rafols and Anna Koop}, title = {Temporal Abstraction in Temporal-difference Networks}, booktitle = {Advances in Neural Information Processing Systems 18 [Neural Information Processing Systems, {NIPS} 2005, December 5-8, 2005, Vancouver, British Columbia, Canada]}, pages = {1313--1320}, year = {2005}, url = {https://proceedings.neurips.cc/paper/2005/hash/12311d05c9aa67765703984239511212-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/SuttonRK05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SuttonT04, author = {Richard S. Sutton and Brian Tanner}, title = {Temporal-Difference Networks}, booktitle = {Advances in Neural Information Processing Systems 17 [Neural Information Processing Systems, {NIPS} 2004, December 13-18, 2004, Vancouver, British Columbia, Canada]}, pages = {1377--1384}, year = {2004}, url = {https://proceedings.neurips.cc/paper/2004/hash/9d28de8ff9bb6a3fa41fddfdc28f3bc1-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/SuttonT04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@proceedings{DBLP:conf/aaai/2002, editor = {Rina Dechter and Michael J. Kearns and Richard S. Sutton}, title = {Proceedings of the Eighteenth National Conference on Artificial Intelligence and Fourteenth Conference on Innovative Applications of Artificial Intelligence, July 28 - August 1, 2002, Edmonton, Alberta, Canada}, publisher = {{AAAI} Press / The {MIT} Press}, year = {2002}, url = {http://www.aaai.org/Conferences/AAAI/aaai02.php}, timestamp = {Tue, 05 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/2002.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/PrecupSD01, author = {Doina Precup and Richard S. Sutton and Sanjoy Dasgupta}, editor = {Carla E. Brodley and Andrea Pohoreckyj Danyluk}, title = {Off-Policy Temporal Difference Learning with Function Approximation}, booktitle = {Proceedings of the Eighteenth International Conference on Machine Learning {(ICML} 2001), Williams College, Williamstown, MA, USA, June 28 - July 1, 2001}, pages = {417--424}, publisher = {Morgan Kaufmann}, year = {2001}, timestamp = {Wed, 27 Nov 2002 10:53:35 +0100}, biburl = {https://dblp.org/rec/conf/icml/PrecupSD01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/StoneS01, author = {Peter Stone and Richard S. Sutton}, editor = {Carla E. Brodley and Andrea Pohoreckyj Danyluk}, title = {Scaling Reinforcement Learning toward RoboCup Soccer}, booktitle = {Proceedings of the Eighteenth International Conference on Machine Learning {(ICML} 2001), Williams College, Williamstown, MA, USA, June 28 - July 1, 2001}, pages = {537--544}, publisher = {Morgan Kaufmann}, year = {2001}, timestamp = {Wed, 27 Nov 2002 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/StoneS01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/LittmanSS01, author = {Michael L. Littman and Richard S. Sutton and Satinder Singh}, editor = {Thomas G. Dietterich and Suzanna Becker and Zoubin Ghahramani}, title = {Predictive Representations of State}, booktitle = {Advances in Neural Information Processing Systems 14 [Neural Information Processing Systems: Natural and Synthetic, {NIPS} 2001, December 3-8, 2001, Vancouver, British Columbia, Canada]}, pages = {1555--1561}, publisher = {{MIT} Press}, year = {2001}, url = {https://proceedings.neurips.cc/paper/2001/hash/1e4d36177d71bbb3558e43af9577d70e-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/LittmanSS01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/robocup/StoneS01, author = {Peter Stone and Richard S. Sutton}, editor = {Andreas Birk and Silvia Coradeschi and Satoshi Tadokoro}, title = {Keepaway Soccer: {A} Machine Learning Testbed}, booktitle = {RoboCup 2001: Robot Soccer World Cup {V}}, series = {Lecture Notes in Computer Science}, volume = {2377}, pages = {214--223}, publisher = {Springer}, year = {2001}, url = {https://doi.org/10.1007/3-540-45603-1\_22}, doi = {10.1007/3-540-45603-1\_22}, timestamp = {Tue, 14 May 2019 10:00:53 +0200}, biburl = {https://dblp.org/rec/conf/robocup/StoneS01.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/PrecupSS00, author = {Doina Precup and Richard S. Sutton and Satinder Singh}, editor = {Pat Langley}, title = {Eligibility Traces for Off-Policy Policy Evaluation}, booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning {(ICML} 2000), Stanford University, Stanford, CA, USA, June 29 - July 2, 2000}, pages = {759--766}, publisher = {Morgan Kaufmann}, year = {2000}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/PrecupSS00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/robocup/StoneSS00, author = {Peter Stone and Richard S. Sutton and Satinder Singh}, editor = {Peter Stone and Tucker R. Balch and Gerhard K. Kraetzschmar}, title = {Reinforcement Learning for 3 vs. 2 Keepaway}, booktitle = {RoboCup 2000: Robot Soccer World Cup {IV}}, series = {Lecture Notes in Computer Science}, volume = {2019}, pages = {249--258}, publisher = {Springer}, year = {2000}, url = {https://doi.org/10.1007/3-540-45324-5\_23}, doi = {10.1007/3-540-45324-5\_23}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/robocup/StoneSS00.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ai/SuttonPS99, author = {Richard S. Sutton and Doina Precup and Satinder Singh}, title = {Between MDPs and Semi-MDPs: {A} Framework for Temporal Abstraction in Reinforcement Learning}, journal = {Artif. Intell.}, volume = {112}, number = {1-2}, pages = {181--211}, year = {1999}, url = {https://doi.org/10.1016/S0004-3702(99)00052-1}, doi = {10.1016/S0004-3702(99)00052-1}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/ai/SuttonPS99.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/eurocolt/Sutton99, author = {Richard S. Sutton}, editor = {Paul Fischer and Hans Ulrich Simon}, title = {Open Theoretical Questions in Reinforcement Learning}, booktitle = {Computational Learning Theory, 4th European Conference, EuroCOLT '99, Nordkirchen, Germany, March 29-31, 1999, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {1572}, pages = {11--17}, publisher = {Springer}, year = {1999}, url = {https://doi.org/10.1007/3-540-49097-3\_2}, doi = {10.1007/3-540-49097-3\_2}, timestamp = {Fri, 08 Sep 2023 21:18:19 +0200}, biburl = {https://dblp.org/rec/conf/eurocolt/Sutton99.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SuttonMSM99, author = {Richard S. Sutton and David A. McAllester and Satinder Singh and Yishay Mansour}, editor = {Sara A. Solla and Todd K. Leen and Klaus{-}Robert M{\"{u}}ller}, title = {Policy Gradient Methods for Reinforcement Learning with Function Approximation}, booktitle = {Advances in Neural Information Processing Systems 12, {[NIPS} Conference, Denver, Colorado, USA, November 29 - December 4, 1999]}, pages = {1057--1063}, publisher = {The {MIT} Press}, year = {1999}, url = {http://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/SuttonMSM99.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@book{DBLP:books/lib/SuttonB98, author = {Richard S. Sutton and Andrew G. Barto}, title = {Reinforcement learning - an introduction}, series = {Adaptive computation and machine learning}, publisher = {{MIT} Press}, year = {1998}, url = {https://www.worldcat.org/oclc/37293240}, isbn = {978-0-262-19398-6}, timestamp = {Fri, 17 Jul 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/books/lib/SuttonB98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tnn/SuttonB98, author = {Richard S. Sutton and Andrew G. Barto}, title = {Reinforcement Learning: An Introduction}, journal = {{IEEE} Trans. Neural Networks}, volume = {9}, number = {5}, pages = {1054--1054}, year = {1998}, url = {https://doi.org/10.1109/TNN.1998.712192}, doi = {10.1109/TNN.1998.712192}, timestamp = {Sun, 28 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/tnn/SuttonB98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ecml/PrecupSS98, author = {Doina Precup and Richard S. Sutton and Satinder Singh}, editor = {Claire Nedellec and C{\'{e}}line Rouveirol}, title = {Theoretical Results on Reinforcement Learning with Temporally Abstract Options}, booktitle = {Machine Learning: ECML-98, 10th European Conference on Machine Learning, Chemnitz, Germany, April 21-23, 1998, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {1398}, pages = {382--393}, publisher = {Springer}, year = {1998}, url = {https://doi.org/10.1007/BFb0026709}, doi = {10.1007/BFB0026709}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/ecml/PrecupSS98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SuttonPS98, author = {Richard S. Sutton and Doina Precup and Satinder Singh}, editor = {Jude W. Shavlik}, title = {Intra-Option Learning about Temporally Abstract Actions}, booktitle = {Proceedings of the Fifteenth International Conference on Machine Learning {(ICML} 1998), Madison, Wisconsin, USA, July 24-27, 1998}, pages = {556--564}, publisher = {Morgan Kaufmann}, year = {1998}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/SuttonPS98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/MollBPS98, author = {Robert Moll and Andrew G. Barto and Theodore J. Perkins and Richard S. Sutton}, editor = {Michael J. Kearns and Sara A. Solla and David A. Cohn}, title = {Learning Instance-Independent Value Functions to Enhance Local Search}, booktitle = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference, Denver, Colorado, USA, November 30 - December 5, 1998]}, pages = {1017--1023}, publisher = {The {MIT} Press}, year = {1998}, url = {http://papers.nips.cc/paper/1573-learning-instance-independent-value-functions-to-enhance-local-search}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/MollBPS98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SuttonSPR98, author = {Richard S. Sutton and Satinder Singh and Doina Precup and Balaraman Ravindran}, editor = {Michael J. Kearns and Sara A. Solla and David A. Cohn}, title = {Improved Switching among Temporally Abstract Actions}, booktitle = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference, Denver, Colorado, USA, November 30 - December 5, 1998]}, pages = {1066--1072}, publisher = {The {MIT} Press}, year = {1998}, url = {http://papers.nips.cc/paper/1607-improved-switching-among-temporally-abstract-actions}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/SuttonSPR98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/seal/Sutton98, author = {Richard S. Sutton}, editor = {Bob McKay and Xin Yao and Charles S. Newton and Jong{-}Hwan Kim and Takeshi Furuhashi}, title = {Reinforcement Learning: Past, Present and Future}, booktitle = {Simulated Evolution and Learning, Second Asia-Pacific Conference on Simulated Evolution and Learning, SEAL'98, Canberra, Australia, November 24-27 1998, Selected Papers}, series = {Lecture Notes in Computer Science}, volume = {1585}, pages = {195--197}, publisher = {Springer}, year = {1998}, url = {https://doi.org/10.1007/3-540-48873-1\_26}, doi = {10.1007/3-540-48873-1\_26}, timestamp = {Tue, 14 May 2019 10:00:41 +0200}, biburl = {https://dblp.org/rec/conf/seal/Sutton98.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/adb/SantamariaSR97, author = {Juan Carlos Santamar{\'{\i}}a and Richard S. Sutton and Ashwin Ram}, title = {Experiments with Reinforcement Learning in Problems with Continuous State and Action Spaces}, journal = {Adapt. Behav.}, volume = {6}, number = {2}, pages = {163--217}, year = {1997}, url = {https://doi.org/10.1177/105971239700600201}, doi = {10.1177/105971239700600201}, timestamp = {Thu, 07 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/adb/SantamariaSR97.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icann/Sutton97, author = {Richard S. Sutton}, editor = {Wulfram Gerstner and Alain Germond and Martin Hasler and Jean{-}Daniel Nicoud}, title = {On the Significance of Markov Decision Processes}, booktitle = {Artificial Neural Networks - {ICANN} '97, 7th International Conference, Lausanne, Switzerland, October 8-10, 1997, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {1327}, pages = {273--282}, publisher = {Springer}, year = {1997}, url = {https://doi.org/10.1007/BFb0020167}, doi = {10.1007/BFB0020167}, timestamp = {Tue, 14 May 2019 10:00:49 +0200}, biburl = {https://dblp.org/rec/conf/icann/Sutton97.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/PrecupS97, author = {Doina Precup and Richard S. Sutton}, editor = {Douglas H. Fisher}, title = {Exponentiated Gradient Methods for Reinforcement Learning}, booktitle = {Proceedings of the Fourteenth International Conference on Machine Learning {(ICML} 1997), Nashville, Tennessee, USA, July 8-12, 1997}, pages = {272--277}, publisher = {Morgan Kaufmann}, year = {1997}, timestamp = {Wed, 04 Dec 2002 12:34:29 +0100}, biburl = {https://dblp.org/rec/conf/icml/PrecupS97.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/PrecupS97, author = {Doina Precup and Richard S. Sutton}, editor = {Michael I. Jordan and Michael J. Kearns and Sara A. Solla}, title = {Multi-time Models for Temporally Abstract Planning}, booktitle = {Advances in Neural Information Processing Systems 10, {[NIPS} Conference, Denver, Colorado, USA, 1997]}, pages = {1050--1056}, publisher = {The {MIT} Press}, year = {1997}, url = {http://papers.nips.cc/paper/1362-multi-time-models-for-temporally-abstract-planning}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/PrecupS97.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/SinghS96, author = {Satinder P. Singh and Richard S. Sutton}, title = {Reinforcement Learning with Replacing Eligibility Traces}, journal = {Mach. Learn.}, volume = {22}, number = {1-3}, pages = {123--158}, year = {1996}, url = {https://doi.org/10.1023/A:1018012322525}, doi = {10.1023/A:1018012322525}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ml/SinghS96.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/Sutton95, author = {Richard S. Sutton}, editor = {Armand Prieditis and Stuart Russell}, title = {{TD} Models: Modeling the World at a Mixture of Time Scales}, booktitle = {Machine Learning, Proceedings of the Twelfth International Conference on Machine Learning, Tahoe City, California, USA, July 9-12, 1995}, pages = {531--539}, publisher = {Morgan Kaufmann}, year = {1995}, url = {https://doi.org/10.1016/b978-1-55860-377-6.50072-4}, doi = {10.1016/B978-1-55860-377-6.50072-4}, timestamp = {Wed, 20 Apr 2022 13:29:53 +0200}, biburl = {https://dblp.org/rec/conf/icml/Sutton95.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/Sutton95, author = {Richard S. Sutton}, editor = {David S. Touretzky and Michael Mozer and Michael E. Hasselmo}, title = {Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding}, booktitle = {Advances in Neural Information Processing Systems 8, NIPS, Denver, CO, USA, November 27-30, 1995}, pages = {1038--1044}, publisher = {{MIT} Press}, year = {1995}, url = {http://papers.nips.cc/paper/1109-generalization-in-reinforcement-learning-successful-examples-using-sparse-coarse-coding}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/Sutton95.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SuttonW93, author = {Richard S. Sutton and Steven D. Whitehead}, editor = {Paul E. Utgoff}, title = {Online Learning with Random Representations}, booktitle = {Machine Learning, Proceedings of the Tenth International Conference, University of Massachusetts, Amherst, MA, USA, June 27-29, 1993}, pages = {314--321}, publisher = {Morgan Kaufmann}, year = {1993}, url = {https://doi.org/10.1016/b978-1-55860-307-3.50047-2}, doi = {10.1016/B978-1-55860-307-3.50047-2}, timestamp = {Mon, 24 Jun 2019 13:59:03 +0200}, biburl = {https://dblp.org/rec/conf/icml/SuttonW93.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/Sutton92, author = {Richard S. Sutton}, editor = {William R. Swartout}, title = {Adapting Bias by Gradient Descent: An Incremental Version of Delta-Bar-Delta}, booktitle = {Proceedings of the 10th National Conference on Artificial Intelligence, San Jose, CA, USA, July 12-16, 1992}, pages = {171--176}, publisher = {{AAAI} Press / The {MIT} Press}, year = {1992}, url = {http://www.aaai.org/Library/AAAI/1992/aaai92-027.php}, timestamp = {Mon, 04 Sep 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/Sutton92.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/sigart/Sutton91, author = {Richard S. Sutton}, title = {Dyna, an Integrated Architecture for Learning, Planning, and Reacting}, journal = {{SIGART} Bull.}, volume = {2}, number = {4}, pages = {160--163}, year = {1991}, url = {https://doi.org/10.1145/122344.122377}, doi = {10.1145/122344.122377}, timestamp = {Tue, 19 May 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/sigart/Sutton91.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SuttonM91, author = {Richard S. Sutton and Christopher J. Matheus}, editor = {Lawrence Birnbaum and Gregg Collins}, title = {Learning Polynomial Functions by Feature Construction}, booktitle = {Proceedings of the Eighth International Workshop (ML91), Northwestern University, Evanston, Illinois, {USA}}, pages = {208--212}, publisher = {Morgan Kaufmann}, year = {1991}, url = {https://doi.org/10.1016/b978-1-55860-200-7.50045-3}, doi = {10.1016/B978-1-55860-200-7.50045-3}, timestamp = {Wed, 19 Jun 2019 17:09:09 +0200}, biburl = {https://dblp.org/rec/conf/icml/SuttonM91.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/Sutton91, author = {Richard S. Sutton}, editor = {Lawrence Birnbaum and Gregg Collins}, title = {Planning by Incremental Dynamic Programming}, booktitle = {Proceedings of the Eighth International Workshop (ML91), Northwestern University, Evanston, Illinois, {USA}}, pages = {353--357}, publisher = {Morgan Kaufmann}, year = {1991}, url = {https://doi.org/10.1016/b978-1-55860-200-7.50073-8}, doi = {10.1016/B978-1-55860-200-7.50073-8}, timestamp = {Wed, 19 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/Sutton91.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/SangerSM91, author = {Terence D. Sanger and Richard S. Sutton and Christopher J. Matheus}, editor = {John E. Moody and Stephen Jose Hanson and Richard Lippmann}, title = {Iterative Construction of Sparse Polynomial Approximations}, booktitle = {Advances in Neural Information Processing Systems 4, {[NIPS} Conference, Denver, Colorado, USA, December 2-5, 1991]}, pages = {1064--1071}, publisher = {Morgan Kaufmann}, year = {1991}, url = {http://papers.nips.cc/paper/538-iterative-construction-of-sparse-polynomial-approximations}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/SangerSM91.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/Sutton90, author = {Richard S. Sutton}, editor = {Bruce W. Porter and Raymond J. Mooney}, title = {Integrated Architectures for Learning, Planning, and Reacting Based on Approximating Dynamic Programming}, booktitle = {Machine Learning, Proceedings of the Seventh International Conference on Machine Learning, Austin, Texas, USA, June 21-23, 1990}, pages = {216--224}, publisher = {Morgan Kaufmann}, year = {1990}, url = {https://doi.org/10.1016/b978-1-55860-141-3.50030-4}, doi = {10.1016/B978-1-55860-141-3.50030-4}, timestamp = {Thu, 25 Jul 2019 17:43:05 +0200}, biburl = {https://dblp.org/rec/conf/icml/Sutton90.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/Sutton90, author = {Richard S. Sutton}, editor = {Richard Lippmann and John E. Moody and David S. Touretzky}, title = {Integrated Modeling and Control Based on Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 3, {[NIPS} Conference, Denver, Colorado, USA, November 26-29, 1990]}, pages = {471--478}, publisher = {Morgan Kaufmann}, year = {1990}, url = {http://papers.nips.cc/paper/388-integrated-modeling-and-control-based-on-reinforcement-learning}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/Sutton90.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/BartoSW89, author = {Andrew G. Barto and Richard S. Sutton and Christopher J. C. H. Watkins}, editor = {David S. Touretzky}, title = {Sequential Decision Probelms and Neural Networks}, booktitle = {Advances in Neural Information Processing Systems 2, {[NIPS} Conference, Denver, Colorado, USA, November 27-30, 1989]}, pages = {686--693}, publisher = {Morgan Kaufmann}, year = {1989}, url = {http://papers.nips.cc/paper/194-sequential-decision-problems-and-neural-networks}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/BartoSW89.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/ml/Sutton88, author = {Richard S. Sutton}, title = {Learning to Predict by the Methods of Temporal Differences}, journal = {Mach. Learn.}, volume = {3}, pages = {9--44}, year = {1988}, url = {https://doi.org/10.1007/BF00115009}, doi = {10.1007/BF00115009}, timestamp = {Mon, 02 Mar 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/ml/Sutton88.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcai/SelfridgeSB85, author = {Oliver G. Selfridge and Richard S. Sutton and Andrew G. Barto}, editor = {Aravind K. Joshi}, title = {Training and Tracking in Robotics}, booktitle = {Proceedings of the 9th International Joint Conference on Artificial Intelligence. Los Angeles, CA, USA, August 1985}, pages = {670--672}, publisher = {Morgan Kaufmann}, year = {1985}, url = {http://ijcai.org/Proceedings/85-1/Papers/129a.pdf}, timestamp = {Tue, 20 Aug 2019 16:19:04 +0200}, biburl = {https://dblp.org/rec/conf/ijcai/SelfridgeSB85.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tsmc/BartoSA83, author = {Andrew G. Barto and Richard S. Sutton and Charles W. Anderson}, title = {Neuronlike adaptive elements that can solve difficult learning control problems}, journal = {{IEEE} Trans. Syst. Man Cybern.}, volume = {13}, number = {5}, pages = {834--846}, year = {1983}, url = {https://doi.org/10.1109/TSMC.1983.6313077}, doi = {10.1109/TSMC.1983.6313077}, timestamp = {Tue, 29 Dec 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/tsmc/BartoSA83.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.