BibTeX records: Richard S. Sutton

download as .bib file

@inproceedings{DBLP:conf/aaai/SuttonMHSTT024,
  author       = {Richard S. Sutton and
                  Marlos C. Machado and
                  G. Zacharias Holland and
                  David Szepesvari and
                  Finbarr Timbers and
                  Brian Tanner and
                  Adam White},
  editor       = {Michael J. Wooldridge and
                  Jennifer G. Dy and
                  Sriraam Natarajan},
  title        = {Reward-Respecting Subtasks for Model-Based Reinforcement Learning
                  (Abstract Reprint)},
  booktitle    = {Thirty-Eighth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2024, Thirty-Sixth Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2024, Fourteenth Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2014, February 20-27, 2024, Vancouver,
                  Canada},
  pages        = {22713},
  publisher    = {{AAAI} Press},
  year         = {2024},
  url          = {https://doi.org/10.1609/aaai.v38i20.30613},
  doi          = {10.1609/AAAI.V38I20.30613},
  timestamp    = {Tue, 02 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/SuttonMHSTT024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2401-17401,
  author       = {Thomas Degris and
                  Khurram Javed and
                  Arsalan Sharifnassab and
                  Yuxin Liu and
                  Richard S. Sutton},
  title        = {Step-size Optimization for Continual Learning},
  journal      = {CoRR},
  volume       = {abs/2401.17401},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2401.17401},
  doi          = {10.48550/ARXIV.2401.17401},
  eprinttype    = {arXiv},
  eprint       = {2401.17401},
  timestamp    = {Wed, 07 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2401-17401.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2402-02342,
  author       = {Arsalan Sharifnassab and
                  Saber Salehkaleybar and
                  Richard S. Sutton},
  title        = {MetaOptimize: {A} Framework for Optimizing Step Sizes and Other Meta-parameters},
  journal      = {CoRR},
  volume       = {abs/2402.02342},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.02342},
  doi          = {10.48550/ARXIV.2402.02342},
  eprinttype    = {arXiv},
  eprint       = {2402.02342},
  timestamp    = {Fri, 09 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-02342.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/adb/RafieeAGKSLW23,
  author       = {Banafsheh Rafiee and
                  Zaheer Abbas and
                  Sina Ghiassian and
                  Raksha Kumaraswamy and
                  Richard S. Sutton and
                  Elliot A. Ludvig and
                  Adam White},
  title        = {From eye-blinks to state construction: Diagnostic benchmarks for online
                  representation learning},
  journal      = {Adapt. Behav.},
  volume       = {31},
  number       = {1},
  pages        = {3--19},
  year         = {2023},
  url          = {https://doi.org/10.1177/10597123221085039},
  doi          = {10.1177/10597123221085039},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/adb/RafieeAGKSLW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ai/SuttonMHSTTW23,
  author       = {Richard S. Sutton and
                  Marlos C. Machado and
                  G. Zacharias Holland and
                  David Szepesvari and
                  Finbarr Timbers and
                  Brian Tanner and
                  Adam White},
  title        = {Reward-respecting subtasks for model-based reinforcement learning},
  journal      = {Artif. Intell.},
  volume       = {324},
  pages        = {104001},
  year         = {2023},
  url          = {https://doi.org/10.1016/j.artint.2023.104001},
  doi          = {10.1016/J.ARTINT.2023.104001},
  timestamp    = {Wed, 01 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ai/SuttonMHSTTW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/JavedSSW23,
  author       = {Khurram Javed and
                  Haseeb Shah and
                  Richard S. Sutton and
                  Martha White},
  title        = {Scalable Real-Time Recurrent Learning Using Columnar-Constructive
                  Networks},
  journal      = {J. Mach. Learn. Res.},
  volume       = {24},
  pages        = {256:1--256:34},
  year         = {2023},
  url          = {http://jmlr.org/papers/v24/23-0367.html},
  timestamp    = {Thu, 19 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/JavedSSW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nca/MathewsonPSESP23,
  author       = {Kory W. Mathewson and
                  Adam S. R. Parker and
                  Craig Sherstan and
                  Ann L. Edwards and
                  Richard S. Sutton and
                  Patrick M. Pilarski},
  title        = {Communicative capital: a key resource for human-machine shared agency
                  and collaborative capacity},
  journal      = {Neural Comput. Appl.},
  volume       = {35},
  number       = {23},
  pages        = {16805--16819},
  year         = {2023},
  url          = {https://doi.org/10.1007/s00521-022-07948-1},
  doi          = {10.1007/S00521-022-07948-1},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nca/MathewsonPSESP23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/collas/RafieeG0S0023,
  author       = {Banafsheh Rafiee and
                  Sina Ghiassian and
                  Jun Jin and
                  Richard S. Sutton and
                  Jun Luo and
                  Adam White},
  editor       = {Sarath Chandar and
                  Razvan Pascanu and
                  Hanie Sedghi and
                  Doina Precup},
  title        = {Auxiliary task discovery through generate-and-test},
  booktitle    = {Conference on Lifelong Learning Agents, 22-25 August 2023, McGill
                  University, Montr{\'{e}}al, Qu{\'{e}}bec, Canada},
  series       = {Proceedings of Machine Learning Research},
  volume       = {232},
  pages        = {703--714},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v232/rafiee23a.html},
  timestamp    = {Tue, 20 Feb 2024 13:52:18 +0100},
  biburl       = {https://dblp.org/rec/conf/collas/RafieeG0S0023.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/collas/Asis0S23,
  author       = {Kristopher De Asis and
                  Eric Graves and
                  Richard S. Sutton},
  editor       = {Sarath Chandar and
                  Razvan Pascanu and
                  Hanie Sedghi and
                  Doina Precup},
  title        = {Value-aware Importance Weighting for Off-policy Reinforcement Learning},
  booktitle    = {Conference on Lifelong Learning Agents, 22-25 August 2023, McGill
                  University, Montr{\'{e}}al, Qu{\'{e}}bec, Canada},
  series       = {Proceedings of Machine Learning Research},
  volume       = {232},
  pages        = {745--763},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v232/de-asis23a.html},
  timestamp    = {Tue, 20 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/collas/Asis0S23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SharifnassabS23,
  author       = {Arsalan Sharifnassab and
                  Richard S. Sutton},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {Toward Efficient Gradient-Based Value Estimation},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {30827--30849},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/sharifnassab23a.html},
  timestamp    = {Mon, 28 Aug 2023 17:23:08 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SharifnassabS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-13757,
  author       = {Arsalan Sharifnassab and
                  Richard Sutton},
  title        = {Toward Efficient Gradient-Based Value Estimation},
  journal      = {CoRR},
  volume       = {abs/2301.13757},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.13757},
  doi          = {10.48550/ARXIV.2301.13757},
  eprinttype    = {arXiv},
  eprint       = {2301.13757},
  timestamp    = {Tue, 07 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-13757.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2302-05326,
  author       = {Khurram Javed and
                  Haseeb Shah and
                  Richard S. Sutton and
                  Martha White},
  title        = {Online Real-Time Recurrent Learning Using Sparse Connections and Selective
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2302.05326},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.05326},
  doi          = {10.48550/ARXIV.2302.05326},
  eprinttype    = {arXiv},
  eprint       = {2302.05326},
  timestamp    = {Mon, 13 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-05326.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-13812,
  author       = {Shibhansh Dohare and
                  J. Fernando Hernandez{-}Garcia and
                  Parash Rahman and
                  Richard S. Sutton and
                  A. Rupam Mahmood},
  title        = {Maintaining Plasticity in Deep Continual Learning},
  journal      = {CoRR},
  volume       = {abs/2306.13812},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.13812},
  doi          = {10.48550/ARXIV.2306.13812},
  eprinttype    = {arXiv},
  eprint       = {2306.13812},
  timestamp    = {Tue, 27 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-13812.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-15625,
  author       = {Kristopher De Asis and
                  Eric Graves and
                  Richard S. Sutton},
  title        = {Value-aware Importance Weighting for Off-policy Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2306.15625},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.15625},
  doi          = {10.48550/ARXIV.2306.15625},
  eprinttype    = {arXiv},
  eprint       = {2306.15625},
  timestamp    = {Fri, 30 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-15625.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-01569,
  author       = {Kenny Young and
                  Richard S. Sutton},
  title        = {Iterative Option Discovery for Planning, by Planning},
  journal      = {CoRR},
  volume       = {abs/2310.01569},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.01569},
  doi          = {10.48550/ARXIV.2310.01569},
  eprinttype    = {arXiv},
  eprint       = {2310.01569},
  timestamp    = {Thu, 19 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-01569.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2312-15091,
  author       = {Huizhen Yu and
                  Yi Wan and
                  Richard S. Sutton},
  title        = {A Note on Stability in Asynchronous Stochastic Approximation without
                  Communication Delays},
  journal      = {CoRR},
  volume       = {abs/2312.15091},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.15091},
  doi          = {10.48550/ARXIV.2312.15091},
  eprinttype    = {arXiv},
  eprint       = {2312.15091},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-15091.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/TianYS22,
  author       = {Tian Tian and
                  Kenny Young and
                  Richard S. Sutton},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Doubly-Asynchronous Value Iteration: Making Value Iteration Asynchronous
                  in Actions},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/24e4e3234178a836b70e0aa48827e0ff-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/TianYS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-03466,
  author       = {Richard S. Sutton and
                  Marlos C. Machado and
                  G. Zacharias Holland and
                  David Szepesvari and
                  Finbarr Timbers and
                  Brian Tanner and
                  Adam White},
  title        = {Reward-Respecting Subtasks for Model-Based Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2202.03466},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.03466},
  eprinttype    = {arXiv},
  eprint       = {2202.03466},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-03466.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-09701,
  author       = {Richard S. Sutton},
  title        = {A History of Meta-gradient: Gradient Methods for Meta-learning},
  journal      = {CoRR},
  volume       = {abs/2202.09701},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.09701},
  eprinttype    = {arXiv},
  eprint       = {2202.09701},
  timestamp    = {Tue, 01 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-09701.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-13252,
  author       = {Richard S. Sutton},
  title        = {The Quest for a Common Model of the Intelligent Decision Maker},
  journal      = {CoRR},
  volume       = {abs/2202.13252},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.13252},
  eprinttype    = {arXiv},
  eprint       = {2202.13252},
  timestamp    = {Wed, 02 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-13252.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-12515,
  author       = {Yi Wan and
                  Richard S. Sutton},
  title        = {Toward Discovering Options that Achieve Faster Planning},
  journal      = {CoRR},
  volume       = {abs/2205.12515},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.12515},
  doi          = {10.48550/ARXIV.2205.12515},
  eprinttype    = {arXiv},
  eprint       = {2205.12515},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-12515.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2207-01613,
  author       = {Tian Tian and
                  Kenny Young and
                  Richard S. Sutton},
  title        = {Doubly-Asynchronous Value Iteration: Making Value Iteration Asynchronous
                  in Actions},
  journal      = {CoRR},
  volume       = {abs/2207.01613},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2207.01613},
  doi          = {10.48550/ARXIV.2207.01613},
  eprinttype    = {arXiv},
  eprint       = {2207.01613},
  timestamp    = {Wed, 06 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2207-01613.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2208-11173,
  author       = {Richard S. Sutton and
                  Michael H. Bowling and
                  Patrick M. Pilarski},
  title        = {The Alberta Plan for {AI} Research},
  journal      = {CoRR},
  volume       = {abs/2208.11173},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2208.11173},
  doi          = {10.48550/ARXIV.2208.11173},
  eprinttype    = {arXiv},
  eprint       = {2208.11173},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2208-11173.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2209-15141,
  author       = {Yi Wan and
                  Richard S. Sutton},
  title        = {On Convergence of Average-Reward Off-Policy Control Algorithms in
                  Weakly-Communicating MDPs},
  journal      = {CoRR},
  volume       = {abs/2209.15141},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2209.15141},
  doi          = {10.48550/ARXIV.2209.15141},
  eprinttype    = {arXiv},
  eprint       = {2209.15141},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2209-15141.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-14361,
  author       = {Banafsheh Rafiee and
                  Sina Ghiassian and
                  Jun Jin and
                  Richard S. Sutton and
                  Jun Luo and
                  Adam White},
  title        = {Auxiliary task discovery through generate-and-test},
  journal      = {CoRR},
  volume       = {abs/2210.14361},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.14361},
  doi          = {10.48550/ARXIV.2210.14361},
  eprinttype    = {arXiv},
  eprint       = {2210.14361},
  timestamp    = {Thu, 12 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-14361.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ai/SilverSPS21,
  author       = {David Silver and
                  Satinder Singh and
                  Doina Precup and
                  Richard S. Sutton},
  title        = {Reward is enough},
  journal      = {Artif. Intell.},
  volume       = {299},
  pages        = {103535},
  year         = {2021},
  url          = {https://doi.org/10.1016/j.artint.2021.103535},
  doi          = {10.1016/J.ARTINT.2021.103535},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ai/SilverSPS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/automatica/LeeS21,
  author       = {Jae Young Lee and
                  Richard S. Sutton},
  title        = {Policy iterations for reinforcement learning problems in continuous
                  time and space - Fundamental theory and methods},
  journal      = {Autom.},
  volume       = {126},
  pages        = {109421},
  year         = {2021},
  url          = {https://doi.org/10.1016/j.automatica.2020.109421},
  doi          = {10.1016/J.AUTOMATICA.2020.109421},
  timestamp    = {Thu, 18 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/automatica/LeeS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tsmc/BartoSA21,
  author       = {Andrew G. Barto and
                  Richard S. Sutton and
                  Charles W. Anderson},
  title        = {Looking Back on the Actor-Critic Architecture},
  journal      = {{IEEE} Trans. Syst. Man Cybern. Syst.},
  volume       = {51},
  number       = {1},
  pages        = {40--50},
  year         = {2021},
  url          = {https://doi.org/10.1109/TSMC.2020.3041775},
  doi          = {10.1109/TSMC.2020.3041775},
  timestamp    = {Thu, 27 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tsmc/BartoSA21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/WanNS21,
  author       = {Yi Wan and
                  Abhishek Naik and
                  Richard S. Sutton},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Learning and Planning in Average-Reward Markov Decision Processes},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {10653--10662},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/wan21a.html},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/WanNS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/ZhangWSW21,
  author       = {Shangtong Zhang and
                  Yi Wan and
                  Richard S. Sutton and
                  Shimon Whiteson},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Average-Reward Off-Policy Policy Evaluation with Function Approximation},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {12578--12588},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/zhang21u.html},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/ZhangWSW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/WanNS21,
  author       = {Yi Wan and
                  Abhishek Naik and
                  Richard S. Sutton},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Average-Reward Learning and Planning with Options},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {22758--22769},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/c058f544c737782deacefa532d9add4c-Abstract.html},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/WanNS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2101-02808,
  author       = {Shangtong Zhang and
                  Yi Wan and
                  Richard S. Sutton and
                  Shimon Whiteson},
  title        = {Average-Reward Off-Policy Policy Evaluation with Function Approximation},
  journal      = {CoRR},
  volume       = {abs/2101.02808},
  year         = {2021},
  url          = {https://arxiv.org/abs/2101.02808},
  eprinttype    = {arXiv},
  eprint       = {2101.02808},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2101-02808.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-07686,
  author       = {Dylan R. Ashley and
                  Sina Ghiassian and
                  Richard S. Sutton},
  title        = {Does Standard Backpropagation Forget Less Catastrophically Than Adam?},
  journal      = {CoRR},
  volume       = {abs/2102.07686},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.07686},
  eprinttype    = {arXiv},
  eprint       = {2102.07686},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-07686.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2103-05787,
  author       = {Khurram Javed and
                  Martha White and
                  Richard S. Sutton},
  title        = {Scalable Online Recurrent Learning Using Columnar Neural Networks},
  journal      = {CoRR},
  volume       = {abs/2103.05787},
  year         = {2021},
  url          = {https://arxiv.org/abs/2103.05787},
  eprinttype    = {arXiv},
  eprint       = {2103.05787},
  timestamp    = {Tue, 16 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2103-05787.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-08543,
  author       = {Katya Kudashkina and
                  Yi Wan and
                  Abhishek Naik and
                  Richard S. Sutton},
  title        = {Planning with Expectation Models for Control},
  journal      = {CoRR},
  volume       = {abs/2104.08543},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.08543},
  eprinttype    = {arXiv},
  eprint       = {2104.08543},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-08543.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-00922,
  author       = {Sina Ghiassian and
                  Richard S. Sutton},
  title        = {An Empirical Comparison of Off-policy Prediction Learning Algorithms
                  on the Collision Task},
  journal      = {CoRR},
  volume       = {abs/2106.00922},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.00922},
  eprinttype    = {arXiv},
  eprint       = {2106.00922},
  timestamp    = {Wed, 09 Jun 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-00922.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2108-06325,
  author       = {Shibhansh Dohare and
                  A. Rupam Mahmood and
                  Richard S. Sutton},
  title        = {Continual Backprop: Stochastic Gradient Descent with Persistent Randomness},
  journal      = {CoRR},
  volume       = {abs/2108.06325},
  year         = {2021},
  url          = {https://arxiv.org/abs/2108.06325},
  eprinttype    = {arXiv},
  eprint       = {2108.06325},
  timestamp    = {Wed, 18 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2108-06325.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-05110,
  author       = {Sina Ghiassian and
                  Richard S. Sutton},
  title        = {An Empirical Comparison of Off-policy Prediction Learning Algorithms
                  in the Four Rooms Environment},
  journal      = {CoRR},
  volume       = {abs/2109.05110},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.05110},
  eprinttype    = {arXiv},
  eprint       = {2109.05110},
  timestamp    = {Tue, 21 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-05110.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-13855,
  author       = {Yi Wan and
                  Abhishek Naik and
                  Richard S. Sutton},
  title        = {Average-Reward Learning and Planning with Options},
  journal      = {CoRR},
  volume       = {abs/2110.13855},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.13855},
  eprinttype    = {arXiv},
  eprint       = {2110.13855},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-13855.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2112-15236,
  author       = {Amir Samani and
                  Richard S. Sutton},
  title        = {Learning Agent State Online with Recurrent Generate-and-Test},
  journal      = {CoRR},
  volume       = {abs/2112.15236},
  year         = {2021},
  url          = {https://arxiv.org/abs/2112.15236},
  eprinttype    = {arXiv},
  eprint       = {2112.15236},
  timestamp    = {Wed, 05 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2112-15236.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jagi/MonettLTBBGBCCS20,
  author       = {Dagmar Monett and
                  Colin W. P. Lewis and
                  Kristinn R. Th{\'{o}}risson and
                  Joscha Bach and
                  Gianluca Baldassarre and
                  Giovanni Granato and
                  Istvan S. N. Berkeley and
                  Fran{\c{c}}ois Chollet and
                  Matthew Crosby and
                  Henry Shevlin and
                  John F. Sowa and
                  John E. Laird and
                  Shane Legg and
                  Peter Lindes and
                  Tom{\'{a}}s Mikolov and
                  William J. Rapaport and
                  Ra{\'{u}}l Rojas and
                  Marek Rosa and
                  Peter Stone and
                  Richard S. Sutton and
                  Roman V. Yampolskiy and
                  Pei Wang and
                  Roger C. Schank and
                  Aaron Sloman and
                  Alan F. T. Winfield},
  title        = {Special Issue "On Defining Artificial Intelligence" - Commentaries
                  and Author's Response},
  journal      = {J. Artif. Gen. Intell.},
  volume       = {11},
  number       = {2},
  pages        = {1--100},
  year         = {2020},
  url          = {https://doi.org/10.2478/jagi-2020-0003},
  doi          = {10.2478/JAGI-2020-0003},
  timestamp    = {Mon, 03 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jagi/MonettLTBBGBCCS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/AsisCPSG20,
  author       = {Kristopher De Asis and
                  Alan Chan and
                  Silviu Pitis and
                  Richard S. Sutton and
                  Daniel Graves},
  title        = {Fixed-Horizon Temporal Difference Methods for Stable Reinforcement
                  Learning},
  booktitle    = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2020, The Thirty-Second Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA,
                  February 7-12, 2020},
  pages        = {3741--3748},
  publisher    = {{AAAI} Press},
  year         = {2020},
  url          = {https://doi.org/10.1609/aaai.v34i04.5784},
  doi          = {10.1609/AAAI.V34I04.5784},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/AsisCPSG20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/OsbandDHASSMLSS20,
  author       = {Ian Osband and
                  Yotam Doron and
                  Matteo Hessel and
                  John Aslanides and
                  Eren Sezener and
                  Andre Saraiva and
                  Katrina McKinney and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Satinder Singh and
                  Benjamin Van Roy and
                  Richard S. Sutton and
                  David Silver and
                  Hado van Hasselt},
  title        = {Behaviour Suite for Reinforcement Learning},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=rygf-kSYwH},
  timestamp    = {Mon, 15 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/OsbandDHASSMLSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-16318,
  author       = {Yi Wan and
                  Abhishek Naik and
                  Richard S. Sutton},
  title        = {Learning and Planning in Average-Reward Markov Decision Processes},
  journal      = {CoRR},
  volume       = {abs/2006.16318},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.16318},
  eprinttype    = {arXiv},
  eprint       = {2006.16318},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-16318.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2008-11329,
  author       = {Alan Chan and
                  Kristopher De Asis and
                  Richard S. Sutton},
  title        = {Inverse Policy Evaluation for Value-based Sequential Decision-making},
  journal      = {CoRR},
  volume       = {abs/2008.11329},
  year         = {2020},
  url          = {https://arxiv.org/abs/2008.11329},
  eprinttype    = {arXiv},
  eprint       = {2008.11329},
  timestamp    = {Tue, 08 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2008-11329.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2008-12095,
  author       = {Katya Kudashkina and
                  Patrick M. Pilarski and
                  Richard S. Sutton},
  title        = {Document-editing Assistants and Model-based Reinforcement Learning
                  as a Path to Conversational {AI}},
  journal      = {CoRR},
  volume       = {abs/2008.12095},
  year         = {2020},
  url          = {https://arxiv.org/abs/2008.12095},
  eprinttype    = {arXiv},
  eprint       = {2008.12095},
  timestamp    = {Tue, 15 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2008-12095.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-15268,
  author       = {Kenny Young and
                  Richard S. Sutton},
  title        = {Understanding the Pathologies of Approximate Policy Evaluation when
                  Combined with Greedification in Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2010.15268},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.15268},
  eprinttype    = {arXiv},
  eprint       = {2010.15268},
  timestamp    = {Tue, 03 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-15268.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/RafieeGWS19,
  author       = {Banafsheh Rafiee and
                  Sina Ghiassian and
                  Adam White and
                  Richard S. Sutton},
  editor       = {Edith Elkind and
                  Manuela Veloso and
                  Noa Agmon and
                  Matthew E. Taylor},
  title        = {Prediction in Intelligence: An Empirical Comparison of Off-policy
                  Algorithms on Robots},
  booktitle    = {Proceedings of the 18th International Conference on Autonomous Agents
                  and MultiAgent Systems, {AAMAS} '19, Montreal, QC, Canada, May 13-17,
                  2019},
  pages        = {332--340},
  publisher    = {International Foundation for Autonomous Agents and Multiagent Systems},
  year         = {2019},
  url          = {http://dl.acm.org/citation.cfm?id=3331711},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/RafieeGWS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/TianS19,
  author       = {Tian Tian and
                  Richard S. Sutton},
  editor       = {Amal El Fallah Seghrouchni and
                  David Sarne},
  title        = {Extending Sliding-Step Importance Weighting from Supervised Learning
                  to Reinforcement Learning},
  booktitle    = {Artificial Intelligence. {IJCAI} 2019 International Workshops - Macao,
                  China, August 10-12, 2019, Revised Selected Best Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {12158},
  pages        = {67--82},
  publisher    = {Springer},
  year         = {2019},
  url          = {https://doi.org/10.1007/978-3-030-56150-5\_4},
  doi          = {10.1007/978-3-030-56150-5\_4},
  timestamp    = {Thu, 16 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/TianS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/WanZWWS19,
  author       = {Yi Wan and
                  Muhammad Zaheer and
                  Adam White and
                  Martha White and
                  Richard S. Sutton},
  editor       = {Sarit Kraus},
  title        = {Planning with Expectation Models},
  booktitle    = {Proceedings of the Twenty-Eighth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16,
                  2019},
  pages        = {3649--3655},
  publisher    = {ijcai.org},
  year         = {2019},
  url          = {https://doi.org/10.24963/ijcai.2019/506},
  doi          = {10.24963/IJCAI.2019/506},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ijcai/WanZWWS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-07510,
  author       = {J. Fernando Hernandez{-}Garcia and
                  Richard S. Sutton},
  title        = {Understanding Multi-Step Deep Reinforcement Learning: {A} Systematic
                  Study of the {DQN} Target},
  journal      = {CoRR},
  volume       = {abs/1901.07510},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.07510},
  eprinttype    = {arXiv},
  eprint       = {1901.07510},
  timestamp    = {Sat, 02 Feb 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-07510.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1903-00194,
  author       = {Xiang Gu and
                  Sina Ghiassian and
                  Richard S. Sutton},
  title        = {Should All Temporal Difference Learning Use Emphasis?},
  journal      = {CoRR},
  volume       = {abs/1903.00194},
  year         = {2019},
  url          = {http://arxiv.org/abs/1903.00194},
  eprinttype    = {arXiv},
  eprint       = {1903.00194},
  timestamp    = {Tue, 31 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1903-00194.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1903-03252,
  author       = {Alexandra Kearney and
                  Vivek Veeriah and
                  Jaden B. Travnik and
                  Patrick M. Pilarski and
                  Richard S. Sutton},
  title        = {Learning Feature Relevance Through Step Size Adaptation in Temporal-Difference
                  Learning},
  journal      = {CoRR},
  volume       = {abs/1903.03252},
  year         = {2019},
  url          = {http://arxiv.org/abs/1903.03252},
  eprinttype    = {arXiv},
  eprint       = {1903.03252},
  timestamp    = {Sun, 31 Mar 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1903-03252.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-01191,
  author       = {Yi Wan and
                  Muhammad Zaheer and
                  Adam White and
                  Martha White and
                  Richard S. Sutton},
  title        = {Planning with Expectation Models},
  journal      = {CoRR},
  volume       = {abs/1904.01191},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.01191},
  eprinttype    = {arXiv},
  eprint       = {1904.01191},
  timestamp    = {Thu, 14 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-01191.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1908-03568,
  author       = {Ian Osband and
                  Yotam Doron and
                  Matteo Hessel and
                  John Aslanides and
                  Eren Sezener and
                  Andre Saraiva and
                  Katrina McKinney and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Satinder Singh and
                  Benjamin Van Roy and
                  Richard S. Sutton and
                  David Silver and
                  Hado van Hasselt},
  title        = {Behaviour Suite for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1908.03568},
  year         = {2019},
  url          = {http://arxiv.org/abs/1908.03568},
  eprinttype    = {arXiv},
  eprint       = {1908.03568},
  timestamp    = {Mon, 15 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1908-03568.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1909-03906,
  author       = {Kristopher De Asis and
                  Alan Chan and
                  Silviu Pitis and
                  Richard S. Sutton and
                  Daniel Graves},
  title        = {Fixed-Horizon Temporal Difference Methods for Stable Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/1909.03906},
  year         = {2019},
  url          = {http://arxiv.org/abs/1909.03906},
  eprinttype    = {arXiv},
  eprint       = {1909.03906},
  timestamp    = {Tue, 08 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1909-03906.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-02140,
  author       = {Abhishek Naik and
                  Roshan Shariff and
                  Niko Yasui and
                  Richard S. Sutton},
  title        = {Discounted Reinforcement Learning is Not an Optimization Problem},
  journal      = {CoRR},
  volume       = {abs/1910.02140},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.02140},
  eprinttype    = {arXiv},
  eprint       = {1910.02140},
  timestamp    = {Wed, 09 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-02140.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1912-04002,
  author       = {J. Fernando Hernandez{-}Garcia and
                  Richard S. Sutton},
  title        = {Learning Sparse Representations Incrementally in Deep Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/1912.04002},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.04002},
  eprinttype    = {arXiv},
  eprint       = {1912.04002},
  timestamp    = {Thu, 02 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-04002.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/firai/TravnikMSP18,
  author       = {Jaden B. Travnik and
                  Kory W. Mathewson and
                  Richard S. Sutton and
                  Patrick M. Pilarski},
  title        = {Reactive Reinforcement Learning in Asynchronous Environments},
  journal      = {Frontiers Robotics {AI}},
  volume       = {5},
  pages        = {79},
  year         = {2018},
  url          = {https://doi.org/10.3389/frobt.2018.00079},
  doi          = {10.3389/FROBT.2018.00079},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/firai/TravnikMSP18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/YuMS18,
  author       = {Huizhen Yu and
                  Ashique Rupam Mahmood and
                  Richard S. Sutton},
  title        = {On Generalized Bellman Equations and Temporal-Difference Learning},
  journal      = {J. Mach. Learn. Res.},
  volume       = {19},
  pages        = {48:1--48:49},
  year         = {2018},
  url          = {http://jmlr.org/papers/v19/17-283.html},
  timestamp    = {Wed, 10 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/YuMS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/AsisHHS18,
  author       = {Kristopher De Asis and
                  J. Fernando Hernandez{-}Garcia and
                  G. Zacharias Holland and
                  Richard S. Sutton},
  editor       = {Sheila A. McIlraith and
                  Kilian Q. Weinberger},
  title        = {Multi-Step Reinforcement Learning: {A} Unifying Algorithm},
  booktitle    = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence,
                  (AAAI-18), the 30th innovative Applications of Artificial Intelligence
                  (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in
                  Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February
                  2-7, 2018},
  pages        = {2902--2909},
  publisher    = {{AAAI} Press},
  year         = {2018},
  url          = {https://doi.org/10.1609/aaai.v32i1.11631},
  doi          = {10.1609/AAAI.V32I1.11631},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/AsisHHS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/SherstanABYWWS18,
  author       = {Craig Sherstan and
                  Dylan R. Ashley and
                  Brendan Bennett and
                  Kenny Young and
                  Adam White and
                  Martha White and
                  Richard S. Sutton},
  editor       = {Amir Globerson and
                  Ricardo Silva},
  title        = {Comparing Direct and Indirect Temporal-Difference Methods for Estimating
                  the Variance of the Return},
  booktitle    = {Proceedings of the Thirty-Fourth Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2018, Monterey, California, USA, August 6-10,
                  2018},
  pages        = {63--72},
  publisher    = {{AUAI} Press},
  year         = {2018},
  url          = {http://auai.org/uai2018/proceedings/papers/35.pdf},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/SherstanABYWWS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/AsisS18,
  author       = {Kristopher De Asis and
                  Richard S. Sutton},
  editor       = {Amir Globerson and
                  Ricardo Silva},
  title        = {Per-decision Multi-step Temporal Difference Learning with Control
                  Variates},
  booktitle    = {Proceedings of the Thirty-Fourth Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2018, Monterey, California, USA, August 6-10,
                  2018},
  pages        = {786--794},
  publisher    = {{AUAI} Press},
  year         = {2018},
  url          = {http://auai.org/uai2018/proceedings/papers/282.pdf},
  timestamp    = {Thu, 12 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/AsisS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1801-08287,
  author       = {Craig Sherstan and
                  Brendan Bennett and
                  Kenny Young and
                  Dylan R. Ashley and
                  Adam White and
                  Martha White and
                  Richard S. Sutton},
  title        = {Directly Estimating the Variance of the {\(\lambda\)}-Return Using
                  Temporal-Difference Methods},
  journal      = {CoRR},
  volume       = {abs/1801.08287},
  year         = {2018},
  url          = {http://arxiv.org/abs/1801.08287},
  eprinttype    = {arXiv},
  eprint       = {1801.08287},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1801-08287.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1802-06139,
  author       = {Jaden B. Travnik and
                  Kory W. Mathewson and
                  Richard S. Sutton and
                  Patrick M. Pilarski},
  title        = {Reactive Reinforcement Learning in Asynchronous Environments},
  journal      = {CoRR},
  volume       = {abs/1802.06139},
  year         = {2018},
  url          = {http://arxiv.org/abs/1802.06139},
  eprinttype    = {arXiv},
  eprint       = {1802.06139},
  timestamp    = {Fri, 17 Dec 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1802-06139.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1804-03334,
  author       = {Alexandra Kearney and
                  Vivek Veeriah and
                  Jaden B. Travnik and
                  Richard S. Sutton and
                  Patrick M. Pilarski},
  title        = {{TIDBD:} Adapting Temporal-difference Step-sizes Through Stochastic
                  Meta-descent},
  journal      = {CoRR},
  volume       = {abs/1804.03334},
  year         = {2018},
  url          = {http://arxiv.org/abs/1804.03334},
  eprinttype    = {arXiv},
  eprint       = {1804.03334},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1804-03334.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1805-07476,
  author       = {Sina Ghiassian and
                  Huizhen Yu and
                  Banafsheh Rafiee and
                  Richard S. Sutton},
  title        = {Two geometric input transformation methods for fast online reinforcement
                  learning with neural nets},
  journal      = {CoRR},
  volume       = {abs/1805.07476},
  year         = {2018},
  url          = {http://arxiv.org/abs/1805.07476},
  eprinttype    = {arXiv},
  eprint       = {1805.07476},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1805-07476.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-00540,
  author       = {Kenny J. Young and
                  Richard S. Sutton and
                  Shuo Yang},
  title        = {Integrating Episodic Memory into a Reinforcement Learning Agent using
                  Reservoir Sampling},
  journal      = {CoRR},
  volume       = {abs/1806.00540},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.00540},
  eprinttype    = {arXiv},
  eprint       = {1806.00540},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-00540.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1807-01830,
  author       = {Kristopher De Asis and
                  Richard S. Sutton},
  title        = {Per-decision Multi-step Temporal Difference Learning with Control
                  Variates},
  journal      = {CoRR},
  volume       = {abs/1807.01830},
  year         = {2018},
  url          = {http://arxiv.org/abs/1807.01830},
  eprinttype    = {arXiv},
  eprint       = {1807.01830},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1807-01830.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1809-07435,
  author       = {Kristopher De Asis and
                  Brendan Bennett and
                  Richard S. Sutton},
  title        = {Predicting Periodicity with Temporal Difference Learning},
  journal      = {CoRR},
  volume       = {abs/1809.07435},
  year         = {2018},
  url          = {http://arxiv.org/abs/1809.07435},
  eprinttype    = {arXiv},
  eprint       = {1809.07435},
  timestamp    = {Fri, 05 Oct 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1809-07435.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1811-02597,
  author       = {Sina Ghiassian and
                  Andrew Patterson and
                  Martha White and
                  Richard S. Sutton and
                  Adam White},
  title        = {Online Off-policy Prediction},
  journal      = {CoRR},
  volume       = {abs/1811.02597},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.02597},
  eprinttype    = {arXiv},
  eprint       = {1811.02597},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-02597.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ai/YuMS17,
  author       = {Huizhen Yu and
                  Ashique Rupam Mahmood and
                  Richard S. Sutton},
  editor       = {Malek Mouhoub and
                  Philippe Langlais},
  title        = {On Generalized Bellman Equations and Temporal-Difference Learning},
  booktitle    = {Advances in Artificial Intelligence - 30th Canadian Conference on
                  Artificial Intelligence, Canadian {AI} 2017, Edmonton, AB, Canada,
                  May 16-19, 2017, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {10233},
  pages        = {3--14},
  year         = {2017},
  url          = {https://doi.org/10.1007/978-3-319-57351-9\_1},
  doi          = {10.1007/978-3-319-57351-9\_1},
  timestamp    = {Thu, 28 Sep 2023 12:27:16 +0200},
  biburl       = {https://dblp.org/rec/conf/ai/YuMS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/VeeriahSS17,
  author       = {Vivek Veeriah and
                  Harm van Seijen and
                  Richard S. Sutton},
  editor       = {Kate Larson and
                  Michael Winikoff and
                  Sanmay Das and
                  Edmund H. Durfee},
  title        = {Forward Actor-Critic for Nonlinear Function Approximation in Reinforcement
                  Learning},
  booktitle    = {Proceedings of the 16th Conference on Autonomous Agents and MultiAgent
                  Systems, {AAMAS} 2017, S{\~{a}}o Paulo, Brazil, May 8-12, 2017},
  pages        = {556--564},
  publisher    = {{ACM}},
  year         = {2017},
  url          = {http://dl.acm.org/citation.cfm?id=3091207},
  timestamp    = {Wed, 27 Sep 2017 07:24:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/VeeriahSS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pkdd/VeeriahZS17,
  author       = {Vivek Veeriah and
                  Shangtong Zhang and
                  Richard S. Sutton},
  editor       = {Michelangelo Ceci and
                  Jaakko Hollm{\'{e}}n and
                  Ljupco Todorovski and
                  Celine Vens and
                  Saso Dzeroski},
  title        = {Crossprop: Learning Representations by Stochastic Meta-Gradient Descent
                  in Neural Networks},
  booktitle    = {Machine Learning and Knowledge Discovery in Databases - European Conference,
                  {ECML} {PKDD} 2017, Skopje, Macedonia, September 18-22, 2017, Proceedings,
                  Part {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {10534},
  pages        = {445--459},
  publisher    = {Springer},
  year         = {2017},
  url          = {https://doi.org/10.1007/978-3-319-71249-9\_27},
  doi          = {10.1007/978-3-319-71249-9\_27},
  timestamp    = {Tue, 14 May 2019 10:00:47 +0200},
  biburl       = {https://dblp.org/rec/conf/pkdd/VeeriahZS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MahmoodYS17,
  author       = {Ashique Rupam Mahmood and
                  Huizhen Yu and
                  Richard S. Sutton},
  title        = {Multi-step Off-policy Learning Without Importance Sampling Ratios},
  journal      = {CoRR},
  volume       = {abs/1702.03006},
  year         = {2017},
  url          = {http://arxiv.org/abs/1702.03006},
  eprinttype    = {arXiv},
  eprint       = {1702.03006},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MahmoodYS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AsisHHS17,
  author       = {Kristopher De Asis and
                  J. Fernando Hernandez{-}Garcia and
                  G. Zacharias Holland and
                  Richard S. Sutton},
  title        = {Multi-step Reinforcement Learning: {A} Unifying Algorithm},
  journal      = {CoRR},
  volume       = {abs/1703.01327},
  year         = {2017},
  url          = {http://arxiv.org/abs/1703.01327},
  eprinttype    = {arXiv},
  eprint       = {1703.01327},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/AsisHHS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/YuMS17,
  author       = {Huizhen Yu and
                  Ashique Rupam Mahmood and
                  Richard S. Sutton},
  title        = {On Generalized Bellman Equations and Temporal-Difference Learning},
  journal      = {CoRR},
  volume       = {abs/1704.04463},
  year         = {2017},
  url          = {http://arxiv.org/abs/1704.04463},
  eprinttype    = {arXiv},
  eprint       = {1704.04463},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/YuMS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/LeeS17a,
  author       = {Jae Young Lee and
                  Richard S. Sutton},
  title        = {Integral Policy Iterations for Reinforcement Learning Problems in
                  Continuous Time and Space},
  journal      = {CoRR},
  volume       = {abs/1705.03520},
  year         = {2017},
  url          = {http://arxiv.org/abs/1705.03520},
  eprinttype    = {arXiv},
  eprint       = {1705.03520},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/LeeS17a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/WhiteS17,
  author       = {Adam White and
                  Richard S. Sutton},
  title        = {GQ({\textdollar}{\(\lambda\)}{\textdollar}) Quick Reference and Implementation
                  Guide},
  journal      = {CoRR},
  volume       = {abs/1705.03967},
  year         = {2017},
  url          = {http://arxiv.org/abs/1705.03967},
  eprinttype    = {arXiv},
  eprint       = {1705.03967},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/WhiteS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/GhiassianRS17,
  author       = {Sina Ghiassian and
                  Banafsheh Rafiee and
                  Richard S. Sutton},
  title        = {A First Empirical Study of Emphatic Temporal Difference Learning},
  journal      = {CoRR},
  volume       = {abs/1705.04185},
  year         = {2017},
  url          = {http://arxiv.org/abs/1705.04185},
  eprinttype    = {arXiv},
  eprint       = {1705.04185},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/GhiassianRS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1711-03676,
  author       = {Patrick M. Pilarski and
                  Richard S. Sutton and
                  Kory W. Mathewson and
                  Craig Sherstan and
                  Adam S. R. Parker and
                  Ann L. Edwards},
  title        = {Communicative Capital for Prosthetic Agents},
  journal      = {CoRR},
  volume       = {abs/1711.03676},
  year         = {2017},
  url          = {http://arxiv.org/abs/1711.03676},
  eprinttype    = {arXiv},
  eprint       = {1711.03676},
  timestamp    = {Fri, 17 Dec 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1711-03676.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1712-01275,
  author       = {Shangtong Zhang and
                  Richard S. Sutton},
  title        = {A Deeper Look at Experience Replay},
  journal      = {CoRR},
  volume       = {abs/1712.01275},
  year         = {2017},
  url          = {http://arxiv.org/abs/1712.01275},
  eprinttype    = {arXiv},
  eprint       = {1712.01275},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1712-01275.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/SuttonMW16,
  author       = {Richard S. Sutton and
                  Ashique Rupam Mahmood and
                  Martha White},
  title        = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference
                  Learning},
  journal      = {J. Mach. Learn. Res.},
  volume       = {17},
  pages        = {73:1--73:29},
  year         = {2016},
  url          = {http://jmlr.org/papers/v17/14-488.html},
  timestamp    = {Wed, 10 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/SuttonMW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/SeijenMPMS16,
  author       = {Harm van Seijen and
                  Ashique Rupam Mahmood and
                  Patrick M. Pilarski and
                  Marlos C. Machado and
                  Richard S. Sutton},
  title        = {True Online Temporal-Difference Learning},
  journal      = {J. Mach. Learn. Res.},
  volume       = {17},
  pages        = {145:1--145:40},
  year         = {2016},
  url          = {http://jmlr.org/papers/v17/15-599.html},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/SeijenMPMS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/VeeriahPS16,
  author       = {Vivek Veeriah and
                  Patrick M. Pilarski and
                  Richard S. Sutton},
  title        = {Face valuing: Training user interfaces with facial expressions and
                  reinforcement learning},
  journal      = {CoRR},
  volume       = {abs/1606.02807},
  year         = {2016},
  url          = {http://arxiv.org/abs/1606.02807},
  eprinttype    = {arXiv},
  eprint       = {1606.02807},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/VeeriahPS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MurphyDLMSW16,
  author       = {Susan A. Murphy and
                  Yanzhen Deng and
                  Eric B. Laber and
                  Hamid Reza Maei and
                  Richard S. Sutton and
                  Katie Witkiewitz},
  title        = {A Batch, Off-Policy, Actor-Critic Algorithm for Optimizing the Average
                  Reward},
  journal      = {CoRR},
  volume       = {abs/1607.05047},
  year         = {2016},
  url          = {http://arxiv.org/abs/1607.05047},
  eprinttype    = {arXiv},
  eprint       = {1607.05047},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MurphyDLMSW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SuttonV16,
  author       = {Richard S. Sutton and
                  Vivek Veeriah},
  title        = {Learning representations through stochastic gradient descent in cross-validation
                  error},
  journal      = {CoRR},
  volume       = {abs/1612.02879},
  year         = {2016},
  url          = {http://arxiv.org/abs/1612.02879},
  eprinttype    = {arXiv},
  eprint       = {1612.02879},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SuttonV16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/VanseijenS15,
  author       = {Harm Vanseijen and
                  Richard S. Sutton},
  editor       = {Francis R. Bach and
                  David M. Blei},
  title        = {A Deeper Look at Planning as Learning from Replay},
  booktitle    = {Proceedings of the 32nd International Conference on Machine Learning,
                  {ICML} 2015, Lille, France, 6-11 July 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {37},
  pages        = {2314--2322},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v37/vanseijen15.html},
  timestamp    = {Thu, 28 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/VanseijenS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/MahmoodS15,
  author       = {Ashique Rupam Mahmood and
                  Richard S. Sutton},
  editor       = {Marina Meila and
                  Tom Heskes},
  title        = {Off-policy learning based on weighted importance sampling with linear
                  computational complexity},
  booktitle    = {Proceedings of the Thirty-First Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2015, July 12-16, 2015, Amsterdam, The Netherlands},
  pages        = {552--561},
  publisher    = {{AUAI} Press},
  year         = {2015},
  url          = {http://auai.org/uai2015/proceedings/papers/165.pdf},
  timestamp    = {Thu, 12 Mar 2020 11:31:09 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/MahmoodS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SuttonMW15,
  author       = {Richard S. Sutton and
                  Ashique Rupam Mahmood and
                  Martha White},
  title        = {An Emphatic Approach to the Problem of Off-policy Temporal-Difference
                  Learning},
  journal      = {CoRR},
  volume       = {abs/1503.04269},
  year         = {2015},
  url          = {http://arxiv.org/abs/1503.04269},
  eprinttype    = {arXiv},
  eprint       = {1503.04269},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SuttonMW15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SuttonT15,
  author       = {Richard S. Sutton and
                  Brian Tanner},
  title        = {Temporal-Difference Networks},
  journal      = {CoRR},
  volume       = {abs/1504.05539},
  year         = {2015},
  url          = {http://arxiv.org/abs/1504.05539},
  eprinttype    = {arXiv},
  eprint       = {1504.05539},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SuttonT15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SeijenMPS15,
  author       = {Harm van Seijen and
                  Ashique Rupam Mahmood and
                  Patrick M. Pilarski and
                  Richard S. Sutton},
  title        = {An Empirical Evaluation of True Online TD({\(\lambda\)})},
  journal      = {CoRR},
  volume       = {abs/1507.00353},
  year         = {2015},
  url          = {http://arxiv.org/abs/1507.00353},
  eprinttype    = {arXiv},
  eprint       = {1507.00353},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SeijenMPS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MahmoodYWS15,
  author       = {Ashique Rupam Mahmood and
                  Huizhen Yu and
                  Martha White and
                  Richard S. Sutton},
  title        = {Emphatic Temporal-Difference Learning},
  journal      = {CoRR},
  volume       = {abs/1507.01569},
  year         = {2015},
  url          = {http://arxiv.org/abs/1507.01569},
  eprinttype    = {arXiv},
  eprint       = {1507.01569},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MahmoodYWS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/Sutton15,
  author       = {Richard S. Sutton},
  title        = {True Online Emphatic TD({\(\lambda\)}): Quick Reference and Implementation
                  Guide},
  journal      = {CoRR},
  volume       = {abs/1507.07147},
  year         = {2015},
  url          = {http://arxiv.org/abs/1507.07147},
  eprinttype    = {arXiv},
  eprint       = {1507.07147},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/Sutton15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HasseltS15,
  author       = {Hado van Hasselt and
                  Richard S. Sutton},
  title        = {Learning to Predict Independent of Span},
  journal      = {CoRR},
  volume       = {abs/1508.04582},
  year         = {2015},
  url          = {http://arxiv.org/abs/1508.04582},
  eprinttype    = {arXiv},
  eprint       = {1508.04582},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HasseltS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SeijenMPMS15,
  author       = {Harm van Seijen and
                  Ashique Rupam Mahmood and
                  Patrick M. Pilarski and
                  Marlos C. Machado and
                  Richard S. Sutton},
  title        = {True Online Temporal-Difference Learning},
  journal      = {CoRR},
  volume       = {abs/1512.04087},
  year         = {2015},
  url          = {http://arxiv.org/abs/1512.04087},
  eprinttype    = {arXiv},
  eprint       = {1512.04087},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/SeijenMPMS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/adb/ModayilWS14,
  author       = {Joseph Modayil and
                  Adam White and
                  Richard S. Sutton},
  title        = {Multi-timescale nexting in a reinforcement learning robot},
  journal      = {Adapt. Behav.},
  volume       = {22},
  number       = {2},
  pages        = {146--160},
  year         = {2014},
  url          = {https://doi.org/10.1177/1059712313511648},
  doi          = {10.1177/1059712313511648},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/adb/ModayilWS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SuttonMPH14,
  author       = {Richard S. Sutton and
                  Ashique Rupam Mahmood and
                  Doina Precup and
                  Hado van Hasselt},
  title        = {A new Q(lambda) with interim forward view and Monte Carlo equivalence},
  booktitle    = {Proceedings of the 31th International Conference on Machine Learning,
                  {ICML} 2014, Beijing, China, 21-26 June 2014},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {32},
  pages        = {568--576},
  publisher    = {JMLR.org},
  year         = {2014},
  url          = {http://proceedings.mlr.press/v32/sutton14.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SuttonMPH14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SeijenS14,
  author       = {Harm van Seijen and
                  Richard S. Sutton},
  title        = {True Online TD(lambda)},
  booktitle    = {Proceedings of the 31th International Conference on Machine Learning,
                  {ICML} 2014, Beijing, China, 21-26 June 2014},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {32},
  pages        = {692--700},
  publisher    = {JMLR.org},
  year         = {2014},
  url          = {http://proceedings.mlr.press/v32/seijen14.html},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SeijenS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/YaoSSMB14,
  author       = {Hengshuai Yao and
                  Csaba Szepesv{\'{a}}ri and
                  Richard S. Sutton and
                  Joseph Modayil and
                  Shalabh Bhatnagar},
  editor       = {Zoubin Ghahramani and
                  Max Welling and
                  Corinna Cortes and
                  Neil D. Lawrence and
                  Kilian Q. Weinberger},
  title        = {Universal Option Models},
  booktitle    = {Advances in Neural Information Processing Systems 27: Annual Conference
                  on Neural Information Processing Systems 2014, December 8-13 2014,
                  Montreal, Quebec, Canada},
  pages        = {990--998},
  year         = {2014},
  url          = {https://proceedings.neurips.cc/paper/2014/hash/996a7fa078cc36c46d02f9af3bef918b-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/YaoSSMB14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/MahmoodHS14,
  author       = {Ashique Rupam Mahmood and
                  Hado van Hasselt and
                  Richard S. Sutton},
  editor       = {Zoubin Ghahramani and
                  Max Welling and
                  Corinna Cortes and
                  Neil D. Lawrence and
                  Kilian Q. Weinberger},
  title        = {Weighted importance sampling for off-policy learning with linear function
                  approximation},
  booktitle    = {Advances in Neural Information Processing Systems 27: Annual Conference
                  on Neural Information Processing Systems 2014, December 8-13 2014,
                  Montreal, Quebec, Canada},
  pages        = {3014--3022},
  year         = {2014},
  url          = {https://proceedings.neurips.cc/paper/2014/hash/be53ee61104935234b174e62a07e53cf-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/MahmoodHS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/HasseltMS14,
  author       = {Hado van Hasselt and
                  Ashique Rupam Mahmood and
                  Richard S. Sutton},
  editor       = {Nevin L. Zhang and
                  Jin Tian},
  title        = {Off-policy {TD(} l) with a true online equivalence},
  booktitle    = {Proceedings of the Thirtieth Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2014, Quebec City, Quebec, Canada, July 23-27,
                  2014},
  pages        = {330--339},
  publisher    = {{AUAI} Press},
  year         = {2014},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=2468\&\#38;proceeding\_id=30},
  timestamp    = {Wed, 03 Feb 2021 11:09:27 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/HasseltMS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ram/PilarskiDDCCHS13,
  author       = {Patrick M. Pilarski and
                  Michael Rory Dawson and
                  Thomas Degris and
                  Jason P. Carey and
                  K. Ming Chan and
                  Jacqueline S. Hebert and
                  Richard S. Sutton},
  title        = {Adaptive Artificial Limbs: {A} Real-Time Approach to Prediction and
                  Anticipation},
  journal      = {{IEEE} Robotics Autom. Mag.},
  volume       = {20},
  number       = {1},
  pages        = {53--64},
  year         = {2013},
  url          = {https://doi.org/10.1109/MRA.2012.2229948},
  doi          = {10.1109/MRA.2012.2229948},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ram/PilarskiDDCCHS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/MahmoodS13,
  author       = {Ashique Rupam Mahmood and
                  Richard S. Sutton},
  title        = {Representation Search through Generate and Test},
  booktitle    = {Learning Rich Representations from Low-Level Sensors, Papers from
                  the 2013 {AAAI} Workshop, Bellevue, Washington, USA, July 15, 2013},
  series       = {{AAAI} Technical Report},
  volume       = {{WS-13-12}},
  publisher    = {{AAAI}},
  year         = {2013},
  url          = {http://www.aaai.org/ocs/index.php/WS/AAAIW13/paper/view/7164},
  timestamp    = {Tue, 05 Sep 2023 08:59:27 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/MahmoodS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aips/SilverSM13,
  author       = {David Silver and
                  Richard S. Sutton and
                  Martin M{\"{u}}ller},
  editor       = {Daniel Borrajo and
                  Subbarao Kambhampati and
                  Angelo Oddi and
                  Simone Fratini},
  title        = {Temporal-Difference Search in Computer Go},
  booktitle    = {Proceedings of the Twenty-Third International Conference on Automated
                  Planning and Scheduling, {ICAPS} 2013, Rome, Italy, June 10-14, 2013},
  publisher    = {{AAAI}},
  year         = {2013},
  url          = {http://www.aaai.org/ocs/index.php/ICAPS/ICAPS13/paper/view/6037},
  timestamp    = {Wed, 29 Mar 2017 16:45:27 +0200},
  biburl       = {https://dblp.org/rec/conf/aips/SilverSM13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SeijenS13,
  author       = {Harm van Seijen and
                  Richard S. Sutton},
  title        = {Planning by Prioritized Sweeping with Small Backups},
  booktitle    = {Proceedings of the 30th International Conference on Machine Learning,
                  {ICML} 2013, Atlanta, GA, USA, 16-21 June 2013},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {28},
  pages        = {361--369},
  publisher    = {JMLR.org},
  year         = {2013},
  url          = {http://proceedings.mlr.press/v28/vanseijen13.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SeijenS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icorr/PilarskiDS13,
  author       = {Patrick M. Pilarski and
                  Travis B. Dick and
                  Richard S. Sutton},
  title        = {Real-time prediction learning for the simultaneous actuation of multiple
                  prosthetic joints},
  booktitle    = {{IEEE} 13th International Conference on Rehabilitation Robotics, {ICORR}
                  2013, Seattle, WA, USA, June 24-26, 2013},
  pages        = {1--8},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/ICORR.2013.6650435},
  doi          = {10.1109/ICORR.2013.6650435},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icorr/PilarskiDS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sara/MahmoodS13,
  author       = {Ashique Rupam Mahmood and
                  Richard S. Sutton},
  editor       = {Alan M. Frisch and
                  Peter Gregory},
  title        = {Position Paper: Representation Search through Generate and Test},
  booktitle    = {Proceedings of the Tenth Symposium on Abstraction, Reformulation,
                  and Approximation, {SARA} 2013, 11-12 July 2013, Leavenworth, Washington,
                  {USA}},
  publisher    = {{AAAI}},
  year         = {2013},
  url          = {http://www.aaai.org/ocs/index.php/SARA/SARA13/paper/view/7255},
  timestamp    = {Tue, 09 Feb 2021 08:32:52 +0100},
  biburl       = {https://dblp.org/rec/conf/sara/MahmoodS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1301-2343,
  author       = {Harm van Seijen and
                  Richard S. Sutton},
  title        = {Planning by Prioritized Sweeping with Small Backups},
  journal      = {CoRR},
  volume       = {abs/1301.2343},
  year         = {2013},
  url          = {http://arxiv.org/abs/1301.2343},
  eprinttype    = {arXiv},
  eprint       = {1301.2343},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1301-2343.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/EdwardsKDSP13,
  author       = {Ann L. Edwards and
                  Alexandra Kearney and
                  Michael Rory Dawson and
                  Richard S. Sutton and
                  Patrick M. Pilarski},
  title        = {Temporal-Difference Learning to Assist Human Decision Making during
                  the Control of an Artificial Limb},
  journal      = {CoRR},
  volume       = {abs/1309.4714},
  year         = {2013},
  url          = {http://arxiv.org/abs/1309.4714},
  eprinttype    = {arXiv},
  eprint       = {1309.4714},
  timestamp    = {Tue, 17 Sep 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/EdwardsKDSP13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/SilverSM12,
  author       = {David Silver and
                  Richard S. Sutton and
                  Martin M{\"{u}}ller},
  title        = {Temporal-difference search in computer Go},
  journal      = {Mach. Learn.},
  volume       = {87},
  number       = {2},
  pages        = {183--219},
  year         = {2012},
  url          = {https://doi.org/10.1007/s10994-012-5280-0},
  doi          = {10.1007/S10994-012-5280-0},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/SilverSM12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaaifs/PilarskiS12,
  author       = {Patrick M. Pilarski and
                  Richard S. Sutton},
  title        = {Between Instruction and Reward: Human-Prompted Switching},
  booktitle    = {Robots Learning Interactively from Human Teachers, Papers from the
                  2012 {AAAI} Fall Symposium, Arlington, Virginia, USA, November 2-4,
                  2012},
  series       = {{AAAI} Technical Report},
  volume       = {{FS-12-07}},
  publisher    = {{AAAI}},
  year         = {2012},
  url          = {http://www.aaai.org/ocs/index.php/FSS/FSS12/paper/view/5496},
  timestamp    = {Tue, 08 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/aaaifs/PilarskiS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/amcc/DegrisPS12,
  author       = {Thomas Degris and
                  Patrick M. Pilarski and
                  Richard S. Sutton},
  title        = {Model-Free reinforcement learning with continuous action in practice},
  booktitle    = {American Control Conference, {ACC} 2012, Montreal, QC, Canada, June
                  27-29, 2012},
  pages        = {2177--2182},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/ACC.2012.6315022},
  doi          = {10.1109/ACC.2012.6315022},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/amcc/DegrisPS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/MahmoodSDP12,
  author       = {Ashique Rupam Mahmood and
                  Richard S. Sutton and
                  Thomas Degris and
                  Patrick M. Pilarski},
  title        = {Tuning-free step-size adaptation},
  booktitle    = {2012 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2012, Kyoto, Japan, March 25-30, 2012},
  pages        = {2121--2124},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/ICASSP.2012.6288330},
  doi          = {10.1109/ICASSP.2012.6288330},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/MahmoodSDP12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdl-epirob/WhiteMS12,
  author       = {Adam White and
                  Joseph Modayil and
                  Richard S. Sutton},
  title        = {Scaling life-long off-policy learning},
  booktitle    = {2012 {IEEE} International Conference on Development and Learning and
                  Epigenetic Robotics, {ICDL-EPIROB} 2012, San Diego, CA, USA, November
                  7-9, 2012},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/DevLrn.2012.6400860},
  doi          = {10.1109/DEVLRN.2012.6400860},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icdl-epirob/WhiteMS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/DegrisWS12,
  author       = {Thomas Degris and
                  Martha White and
                  Richard S. Sutton},
  title        = {Linear Off-Policy Actor-Critic},
  booktitle    = {Proceedings of the 29th International Conference on Machine Learning,
                  {ICML} 2012, Edinburgh, Scotland, UK, June 26 - July 1, 2012},
  publisher    = {icml.cc / Omnipress},
  year         = {2012},
  url          = {http://icml.cc/2012/papers/268.pdf},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/DegrisWS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sab/ModayilWS12,
  author       = {Joseph Modayil and
                  Adam White and
                  Richard S. Sutton},
  editor       = {Tom Ziemke and
                  Christian Balkenius and
                  John Hallam},
  title        = {Multi-timescale Nexting in a Reinforcement Learning Robot},
  booktitle    = {From Animals to Animats 12 - 12th International Conference on Simulation
                  of Adaptive Behavior, {SAB} 2012, Odense, Denmark, August 27-30, 2012.
                  Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {7426},
  pages        = {299--309},
  publisher    = {Springer},
  year         = {2012},
  url          = {https://doi.org/10.1007/978-3-642-33093-3\_30},
  doi          = {10.1007/978-3-642-33093-3\_30},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sab/ModayilWS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/smc/ModayilWPS12,
  author       = {Joseph Modayil and
                  Adam White and
                  Patrick M. Pilarski and
                  Richard S. Sutton},
  title        = {Acquiring a broad range of empirical knowledge in real time by temporal-difference
                  learning},
  booktitle    = {Proceedings of the {IEEE} International Conference on Systems, Man,
                  and Cybernetics, {SMC} 2012, Seoul, Korea (South), October 14-17,
                  2012},
  pages        = {1903--1910},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/ICSMC.2012.6378016},
  doi          = {10.1109/ICSMC.2012.6378016},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/smc/ModayilWPS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1205-4839,
  author       = {Thomas Degris and
                  Martha White and
                  Richard S. Sutton},
  title        = {Off-Policy Actor-Critic},
  journal      = {CoRR},
  volume       = {abs/1205.4839},
  year         = {2012},
  url          = {http://arxiv.org/abs/1205.4839},
  eprinttype    = {arXiv},
  eprint       = {1205.4839},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1205-4839.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1206-3285,
  author       = {Richard S. Sutton and
                  Csaba Szepesv{\'{a}}ri and
                  Alborz Geramifard and
                  Michael Bowling},
  title        = {Dyna-Style Planning with Linear Function Approximation and Prioritized
                  Sweeping},
  journal      = {CoRR},
  volume       = {abs/1206.3285},
  year         = {2012},
  url          = {http://arxiv.org/abs/1206.3285},
  eprinttype    = {arXiv},
  eprint       = {1206.3285},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1206-3285.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1206-6262,
  author       = {Adam White and
                  Joseph Modayil and
                  Richard S. Sutton},
  title        = {Scaling Life-long Off-policy Learning},
  journal      = {CoRR},
  volume       = {abs/1206.6262},
  year         = {2012},
  url          = {http://arxiv.org/abs/1206.6262},
  eprinttype    = {arXiv},
  eprint       = {1206.6262},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1206-6262.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/SuttonMDDPWP11,
  author       = {Richard S. Sutton and
                  Joseph Modayil and
                  Michael Delp and
                  Thomas Degris and
                  Patrick M. Pilarski and
                  Adam White and
                  Doina Precup},
  editor       = {Liz Sonenberg and
                  Peter Stone and
                  Kagan Tumer and
                  Pinar Yolum},
  title        = {Horde: a scalable real-time architecture for learning knowledge from
                  unsupervised sensorimotor interaction},
  booktitle    = {10th International Conference on Autonomous Agents and Multiagent
                  Systems {(AAMAS} 2011), Taipei, Taiwan, May 2-6, 2011, Volume 1-3},
  pages        = {761--768},
  publisher    = {{IFAAMAS}},
  year         = {2011},
  url          = {http://portal.acm.org/citation.cfm?id=2031726\&\#38;CFID=54178199\&\#38;CFTOKEN=61392764},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/SuttonMDDPWP11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ilp/Sutton12,
  author       = {Richard S. Sutton},
  editor       = {Stephen H. Muggleton and
                  Alireza Tamaddoni{-}Nezhad and
                  Francesca A. Lisi},
  title        = {Beyond Reward: The Problem of Knowledge and Data},
  booktitle    = {Inductive Logic Programming - 21st International Conference, {ILP}
                  2011, Windsor Great Park, UK, July 31 - August 3, 2011, Revised Selected
                  Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {7207},
  pages        = {2--6},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-31951-8\_2},
  doi          = {10.1007/978-3-642-31951-8\_2},
  timestamp    = {Sat, 30 Apr 2022 12:43:31 +0200},
  biburl       = {https://dblp.org/rec/conf/ilp/Sutton12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1112-1133,
  author       = {Joseph Modayil and
                  Adam White and
                  Richard S. Sutton},
  title        = {Multi-timescale Nexting in a Reinforcement Learning Robot},
  journal      = {CoRR},
  volume       = {abs/1112.1133},
  year         = {2011},
  url          = {http://arxiv.org/abs/1112.1133},
  eprinttype    = {arXiv},
  eprint       = {1112.1133},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1112-1133.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/MaeiSBS10,
  author       = {Hamid Reza Maei and
                  Csaba Szepesv{\'{a}}ri and
                  Shalabh Bhatnagar and
                  Richard S. Sutton},
  editor       = {Johannes F{\"{u}}rnkranz and
                  Thorsten Joachims},
  title        = {Toward Off-Policy Learning Control with Function Approximation},
  booktitle    = {Proceedings of the 27th International Conference on Machine Learning
                  (ICML-10), June 21-24, 2010, Haifa, Israel},
  pages        = {719--726},
  publisher    = {Omnipress},
  year         = {2010},
  url          = {https://icml.cc/Conferences/2010/papers/627.pdf},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/MaeiSBS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/automatica/BhatnagarSGL09,
  author       = {Shalabh Bhatnagar and
                  Richard S. Sutton and
                  Mohammad Ghavamzadeh and
                  Mark Lee},
  title        = {Natural actor-critic algorithms},
  journal      = {Autom.},
  volume       = {45},
  number       = {11},
  pages        = {2471--2482},
  year         = {2009},
  url          = {https://doi.org/10.1016/j.automatica.2009.07.008},
  doi          = {10.1016/J.AUTOMATICA.2009.07.008},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/automatica/BhatnagarSGL09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SuttonMPBSSW09,
  author       = {Richard S. Sutton and
                  Hamid Reza Maei and
                  Doina Precup and
                  Shalabh Bhatnagar and
                  David Silver and
                  Csaba Szepesv{\'{a}}ri and
                  Eric Wiewiora},
  editor       = {Andrea Pohoreckyj Danyluk and
                  L{\'{e}}on Bottou and
                  Michael L. Littman},
  title        = {Fast gradient-descent methods for temporal-difference learning with
                  linear function approximation},
  booktitle    = {Proceedings of the 26th Annual International Conference on Machine
                  Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {382},
  pages        = {993--1000},
  publisher    = {{ACM}},
  year         = {2009},
  url          = {https://doi.org/10.1145/1553374.1553501},
  doi          = {10.1145/1553374.1553501},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SuttonMPBSSW09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/MaeiSBPSS09,
  author       = {Hamid Reza Maei and
                  Csaba Szepesv{\'{a}}ri and
                  Shalabh Bhatnagar and
                  Doina Precup and
                  David Silver and
                  Richard S. Sutton},
  editor       = {Yoshua Bengio and
                  Dale Schuurmans and
                  John D. Lafferty and
                  Christopher K. I. Williams and
                  Aron Culotta},
  title        = {Convergent Temporal-Difference Learning with Arbitrary Smooth Function
                  Approximation},
  booktitle    = {Advances in Neural Information Processing Systems 22: 23rd Annual
                  Conference on Neural Information Processing Systems 2009. Proceedings
                  of a meeting held 7-10 December 2009, Vancouver, British Columbia,
                  Canada},
  pages        = {1204--1212},
  publisher    = {Curran Associates, Inc.},
  year         = {2009},
  url          = {https://proceedings.neurips.cc/paper/2009/hash/3a15c7d0bbe60300a39f76f8a5ba6896-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/MaeiSBPSS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/YaoSBDS09,
  author       = {Hengshuai Yao and
                  Richard S. Sutton and
                  Shalabh Bhatnagar and
                  Diao Dongcui and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Yoshua Bengio and
                  Dale Schuurmans and
                  John D. Lafferty and
                  Christopher K. I. Williams and
                  Aron Culotta},
  title        = {Multi-Step Dyna Planning for Policy Evaluation and Control},
  booktitle    = {Advances in Neural Information Processing Systems 22: 23rd Annual
                  Conference on Neural Information Processing Systems 2009. Proceedings
                  of a meeting held 7-10 December 2009, Vancouver, British Columbia,
                  Canada},
  pages        = {2187--2195},
  publisher    = {Curran Associates, Inc.},
  year         = {2009},
  url          = {https://proceedings.neurips.cc/paper/2009/hash/c52f1bd66cc19d05628bd8bf27af3ad6-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/YaoSBDS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/neco/LudvigSK08,
  author       = {Elliot A. Ludvig and
                  Richard S. Sutton and
                  E. James Kehoe},
  title        = {Stimulus Representation and the Timing of Reward-Prediction Errors
                  in Models of the Dopamine System},
  journal      = {Neural Comput.},
  volume       = {20},
  number       = {12},
  pages        = {3034--3054},
  year         = {2008},
  url          = {https://doi.org/10.1162/neco.2008.11-07-654},
  doi          = {10.1162/NECO.2008.11-07-654},
  timestamp    = {Tue, 01 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/neco/LudvigSK08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aiide/CutumisuSBS08,
  author       = {Maria Cutumisu and
                  Duane Szafron and
                  Michael H. Bowling and
                  Richard S. Sutton},
  editor       = {Christian Darken and
                  Michael Mateas},
  title        = {Agent Learning using Action-Dependent Learning Rates in Computer Role-Playing
                  Games},
  booktitle    = {Proceedings of the Fourth Artificial Intelligence and Interactive
                  Digital Entertainment Conference, October 22-24, 2008, Stanford, California,
                  {USA}},
  publisher    = {The {AAAI} Press},
  year         = {2008},
  url          = {http://www.aaai.org/Library/AIIDE/2008/aiide08-004.php},
  timestamp    = {Wed, 10 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/aiide/CutumisuSBS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SilverSM08,
  author       = {David Silver and
                  Richard S. Sutton and
                  Martin M{\"{u}}ller},
  editor       = {William W. Cohen and
                  Andrew McCallum and
                  Sam T. Roweis},
  title        = {Sample-based learning and search with permanent and transient memories},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Fifth International Conference
                  {(ICML} 2008), Helsinki, Finland, June 5-9, 2008},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {307},
  pages        = {968--975},
  publisher    = {{ACM}},
  year         = {2008},
  url          = {https://doi.org/10.1145/1390156.1390278},
  doi          = {10.1145/1390156.1390278},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/SilverSM08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LudvigSVK08,
  author       = {Elliot A. Ludvig and
                  Richard S. Sutton and
                  Eric Verbeek and
                  E. James Kehoe},
  editor       = {Daphne Koller and
                  Dale Schuurmans and
                  Yoshua Bengio and
                  L{\'{e}}on Bottou},
  title        = {A computational model of hippocampal function in trace conditioning},
  booktitle    = {Advances in Neural Information Processing Systems 21, Proceedings
                  of the Twenty-Second Annual Conference on Neural Information Processing
                  Systems, Vancouver, British Columbia, Canada, December 8-11, 2008},
  pages        = {993--1000},
  publisher    = {Curran Associates, Inc.},
  year         = {2008},
  url          = {https://proceedings.neurips.cc/paper/2008/hash/d709f38ef758b5066ef31b18039b8ce5-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/LudvigSVK08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SuttonSM08,
  author       = {Richard S. Sutton and
                  Csaba Szepesv{\'{a}}ri and
                  Hamid Reza Maei},
  editor       = {Daphne Koller and
                  Dale Schuurmans and
                  Yoshua Bengio and
                  L{\'{e}}on Bottou},
  title        = {A Convergent O(n) Temporal-difference Algorithm for Off-policy Learning
                  with Linear Function Approximation},
  booktitle    = {Advances in Neural Information Processing Systems 21, Proceedings
                  of the Twenty-Second Annual Conference on Neural Information Processing
                  Systems, Vancouver, British Columbia, Canada, December 8-11, 2008},
  pages        = {1609--1616},
  publisher    = {Curran Associates, Inc.},
  year         = {2008},
  url          = {https://proceedings.neurips.cc/paper/2008/hash/e0c641195b27425bb056ac56f8953d24-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/SuttonSM08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/SuttonSGB08,
  author       = {Richard S. Sutton and
                  Csaba Szepesv{\'{a}}ri and
                  Alborz Geramifard and
                  Michael H. Bowling},
  editor       = {David A. McAllester and
                  Petri Myllym{\"{a}}ki},
  title        = {Dyna-Style Planning with Linear Function Approximation and Prioritized
                  Sweeping},
  booktitle    = {{UAI} 2008, Proceedings of the 24th Conference in Uncertainty in Artificial
                  Intelligence, Helsinki, Finland, July 9-12, 2008},
  pages        = {528--536},
  publisher    = {{AUAI} Press},
  year         = {2008},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1971\&\#38;proceeding\_id=24},
  timestamp    = {Wed, 03 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/SuttonSGB08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SuttonKS07,
  author       = {Richard S. Sutton and
                  Anna Koop and
                  David Silver},
  editor       = {Zoubin Ghahramani},
  title        = {On the role of tracking in stationary environments},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Fourth International Conference
                  {(ICML} 2007), Corvallis, Oregon, USA, June 20-24, 2007},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {227},
  pages        = {871--878},
  publisher    = {{ACM}},
  year         = {2007},
  url          = {https://doi.org/10.1145/1273496.1273606},
  doi          = {10.1145/1273496.1273606},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/SuttonKS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/SilverSM07,
  author       = {David Silver and
                  Richard S. Sutton and
                  Martin M{\"{u}}ller},
  editor       = {Manuela M. Veloso},
  title        = {Reinforcement Learning of Local Shape in the Game of Go},
  booktitle    = {{IJCAI} 2007, Proceedings of the 20th International Joint Conference
                  on Artificial Intelligence, Hyderabad, India, January 6-12, 2007},
  pages        = {1053--1058},
  year         = {2007},
  url          = {http://ijcai.org/Proceedings/07/Papers/170.pdf},
  timestamp    = {Tue, 20 Aug 2019 16:17:11 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/SilverSM07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BhatnagarSGL07,
  author       = {Shalabh Bhatnagar and
                  Richard S. Sutton and
                  Mohammad Ghavamzadeh and
                  Mark Lee},
  editor       = {John C. Platt and
                  Daphne Koller and
                  Yoram Singer and
                  Sam T. Roweis},
  title        = {Incremental Natural Actor-Critic Algorithms},
  booktitle    = {Advances in Neural Information Processing Systems 20, Proceedings
                  of the Twenty-First Annual Conference on Neural Information Processing
                  Systems, Vancouver, British Columbia, Canada, December 3-6, 2007},
  pages        = {105--112},
  publisher    = {Curran Associates, Inc.},
  year         = {2007},
  url          = {https://proceedings.neurips.cc/paper/2007/hash/6883966fd8f918a4aa29be29d2c386fb-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/BhatnagarSGL07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/GeramifardBS06,
  author       = {Alborz Geramifard and
                  Michael H. Bowling and
                  Richard S. Sutton},
  title        = {Incremental Least-Squares Temporal Difference Learning},
  booktitle    = {Proceedings, The Twenty-First National Conference on Artificial Intelligence
                  and the Eighteenth Innovative Applications of Artificial Intelligence
                  Conference, July 16-20, 2006, Boston, Massachusetts, {USA}},
  pages        = {356--361},
  publisher    = {{AAAI} Press},
  year         = {2006},
  url          = {http://www.aaai.org/Library/AAAI/2006/aaai06-057.php},
  timestamp    = {Tue, 05 Sep 2023 09:10:47 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/GeramifardBS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/GeramifardBZS06,
  author       = {Alborz Geramifard and
                  Michael H. Bowling and
                  Martin Zinkevich and
                  Richard S. Sutton},
  editor       = {Bernhard Sch{\"{o}}lkopf and
                  John C. Platt and
                  Thomas Hofmann},
  title        = {iLSTD: Eligibility Traces and Convergence Analysis},
  booktitle    = {Advances in Neural Information Processing Systems 19, Proceedings
                  of the Twentieth Annual Conference on Neural Information Processing
                  Systems, Vancouver, British Columbia, Canada, December 4-7, 2006},
  pages        = {441--448},
  publisher    = {{MIT} Press},
  year         = {2006},
  url          = {https://proceedings.neurips.cc/paper/2006/hash/6ad4174eba19ecb5fed17411a34ff5e6-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/GeramifardBZS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/adb/StoneSK05,
  author       = {Peter Stone and
                  Richard S. Sutton and
                  Gregory Kuhlmann},
  title        = {Reinforcement Learning for RoboCup Soccer Keepaway},
  journal      = {Adapt. Behav.},
  volume       = {13},
  number       = {3},
  pages        = {165--188},
  year         = {2005},
  url          = {https://doi.org/10.1177/105971230501300301},
  doi          = {10.1177/105971230501300301},
  timestamp    = {Tue, 25 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/adb/StoneSK05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/TannerS05,
  author       = {Brian Tanner and
                  Richard S. Sutton},
  editor       = {Luc De Raedt and
                  Stefan Wrobel},
  title        = {TD(lambda) networks: temporal-difference networks with eligibility
                  traces},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Second International Conference
                  {(ICML} 2005), Bonn, Germany, August 7-11, 2005},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {119},
  pages        = {888--895},
  publisher    = {{ACM}},
  year         = {2005},
  url          = {https://doi.org/10.1145/1102351.1102463},
  doi          = {10.1145/1102351.1102463},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/TannerS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/RafolsRST05,
  author       = {Eddie J. Rafols and
                  Mark B. Ring and
                  Richard S. Sutton and
                  Brian Tanner},
  editor       = {Leslie Pack Kaelbling and
                  Alessandro Saffiotti},
  title        = {Using Predictive Representations to Improve Generalization in Reinforcement
                  Learning},
  booktitle    = {IJCAI-05, Proceedings of the Nineteenth International Joint Conference
                  on Artificial Intelligence, Edinburgh, Scotland, UK, July 30 - August
                  5, 2005},
  pages        = {835--840},
  publisher    = {Professional Book Center},
  year         = {2005},
  url          = {http://ijcai.org/Proceedings/05/Papers/1650.pdf},
  timestamp    = {Tue, 20 Aug 2019 16:16:29 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/RafolsRST05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/TannerS05,
  author       = {Brian Tanner and
                  Richard S. Sutton},
  editor       = {Leslie Pack Kaelbling and
                  Alessandro Saffiotti},
  title        = {Temporal-Difference Networks with History},
  booktitle    = {IJCAI-05, Proceedings of the Nineteenth International Joint Conference
                  on Artificial Intelligence, Edinburgh, Scotland, UK, July 30 - August
                  5, 2005},
  pages        = {865--870},
  publisher    = {Professional Book Center},
  year         = {2005},
  url          = {http://ijcai.org/Proceedings/05/Papers/1640.pdf},
  timestamp    = {Tue, 20 Aug 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/TannerS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/PrecupSPKS05,
  author       = {Doina Precup and
                  Richard S. Sutton and
                  Cosmin Paduraru and
                  Anna Koop and
                  Satinder Singh},
  title        = {Off-policy Learning with Options and Recognizers},
  booktitle    = {Advances in Neural Information Processing Systems 18 [Neural Information
                  Processing Systems, {NIPS} 2005, December 5-8, 2005, Vancouver, British
                  Columbia, Canada]},
  pages        = {1097--1104},
  year         = {2005},
  url          = {https://proceedings.neurips.cc/paper/2005/hash/f75526659f31040afeb61cb7133e4e6d-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/PrecupSPKS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SuttonRK05,
  author       = {Richard S. Sutton and
                  Eddie J. Rafols and
                  Anna Koop},
  title        = {Temporal Abstraction in Temporal-difference Networks},
  booktitle    = {Advances in Neural Information Processing Systems 18 [Neural Information
                  Processing Systems, {NIPS} 2005, December 5-8, 2005, Vancouver, British
                  Columbia, Canada]},
  pages        = {1313--1320},
  year         = {2005},
  url          = {https://proceedings.neurips.cc/paper/2005/hash/12311d05c9aa67765703984239511212-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/SuttonRK05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SuttonT04,
  author       = {Richard S. Sutton and
                  Brian Tanner},
  title        = {Temporal-Difference Networks},
  booktitle    = {Advances in Neural Information Processing Systems 17 [Neural Information
                  Processing Systems, {NIPS} 2004, December 13-18, 2004, Vancouver,
                  British Columbia, Canada]},
  pages        = {1377--1384},
  year         = {2004},
  url          = {https://proceedings.neurips.cc/paper/2004/hash/9d28de8ff9bb6a3fa41fddfdc28f3bc1-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SuttonT04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@proceedings{DBLP:conf/aaai/2002,
  editor       = {Rina Dechter and
                  Michael J. Kearns and
                  Richard S. Sutton},
  title        = {Proceedings of the Eighteenth National Conference on Artificial Intelligence
                  and Fourteenth Conference on Innovative Applications of Artificial
                  Intelligence, July 28 - August 1, 2002, Edmonton, Alberta, Canada},
  publisher    = {{AAAI} Press / The {MIT} Press},
  year         = {2002},
  url          = {http://www.aaai.org/Conferences/AAAI/aaai02.php},
  timestamp    = {Tue, 05 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/2002.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/PrecupSD01,
  author       = {Doina Precup and
                  Richard S. Sutton and
                  Sanjoy Dasgupta},
  editor       = {Carla E. Brodley and
                  Andrea Pohoreckyj Danyluk},
  title        = {Off-Policy Temporal Difference Learning with Function Approximation},
  booktitle    = {Proceedings of the Eighteenth International Conference on Machine
                  Learning {(ICML} 2001), Williams College, Williamstown, MA, USA, June
                  28 - July 1, 2001},
  pages        = {417--424},
  publisher    = {Morgan Kaufmann},
  year         = {2001},
  timestamp    = {Wed, 27 Nov 2002 10:53:35 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/PrecupSD01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/StoneS01,
  author       = {Peter Stone and
                  Richard S. Sutton},
  editor       = {Carla E. Brodley and
                  Andrea Pohoreckyj Danyluk},
  title        = {Scaling Reinforcement Learning toward RoboCup Soccer},
  booktitle    = {Proceedings of the Eighteenth International Conference on Machine
                  Learning {(ICML} 2001), Williams College, Williamstown, MA, USA, June
                  28 - July 1, 2001},
  pages        = {537--544},
  publisher    = {Morgan Kaufmann},
  year         = {2001},
  timestamp    = {Wed, 27 Nov 2002 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/StoneS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LittmanSS01,
  author       = {Michael L. Littman and
                  Richard S. Sutton and
                  Satinder Singh},
  editor       = {Thomas G. Dietterich and
                  Suzanna Becker and
                  Zoubin Ghahramani},
  title        = {Predictive Representations of State},
  booktitle    = {Advances in Neural Information Processing Systems 14 [Neural Information
                  Processing Systems: Natural and Synthetic, {NIPS} 2001, December 3-8,
                  2001, Vancouver, British Columbia, Canada]},
  pages        = {1555--1561},
  publisher    = {{MIT} Press},
  year         = {2001},
  url          = {https://proceedings.neurips.cc/paper/2001/hash/1e4d36177d71bbb3558e43af9577d70e-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/LittmanSS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/robocup/StoneS01,
  author       = {Peter Stone and
                  Richard S. Sutton},
  editor       = {Andreas Birk and
                  Silvia Coradeschi and
                  Satoshi Tadokoro},
  title        = {Keepaway Soccer: {A} Machine Learning Testbed},
  booktitle    = {RoboCup 2001: Robot Soccer World Cup {V}},
  series       = {Lecture Notes in Computer Science},
  volume       = {2377},
  pages        = {214--223},
  publisher    = {Springer},
  year         = {2001},
  url          = {https://doi.org/10.1007/3-540-45603-1\_22},
  doi          = {10.1007/3-540-45603-1\_22},
  timestamp    = {Tue, 14 May 2019 10:00:53 +0200},
  biburl       = {https://dblp.org/rec/conf/robocup/StoneS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/PrecupSS00,
  author       = {Doina Precup and
                  Richard S. Sutton and
                  Satinder Singh},
  editor       = {Pat Langley},
  title        = {Eligibility Traces for Off-Policy Policy Evaluation},
  booktitle    = {Proceedings of the Seventeenth International Conference on Machine
                  Learning {(ICML} 2000), Stanford University, Stanford, CA, USA, June
                  29 - July 2, 2000},
  pages        = {759--766},
  publisher    = {Morgan Kaufmann},
  year         = {2000},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/PrecupSS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/robocup/StoneSS00,
  author       = {Peter Stone and
                  Richard S. Sutton and
                  Satinder Singh},
  editor       = {Peter Stone and
                  Tucker R. Balch and
                  Gerhard K. Kraetzschmar},
  title        = {Reinforcement Learning for 3 vs. 2 Keepaway},
  booktitle    = {RoboCup 2000: Robot Soccer World Cup {IV}},
  series       = {Lecture Notes in Computer Science},
  volume       = {2019},
  pages        = {249--258},
  publisher    = {Springer},
  year         = {2000},
  url          = {https://doi.org/10.1007/3-540-45324-5\_23},
  doi          = {10.1007/3-540-45324-5\_23},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/robocup/StoneSS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ai/SuttonPS99,
  author       = {Richard S. Sutton and
                  Doina Precup and
                  Satinder Singh},
  title        = {Between MDPs and Semi-MDPs: {A} Framework for Temporal Abstraction
                  in Reinforcement Learning},
  journal      = {Artif. Intell.},
  volume       = {112},
  number       = {1-2},
  pages        = {181--211},
  year         = {1999},
  url          = {https://doi.org/10.1016/S0004-3702(99)00052-1},
  doi          = {10.1016/S0004-3702(99)00052-1},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ai/SuttonPS99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/eurocolt/Sutton99,
  author       = {Richard S. Sutton},
  editor       = {Paul Fischer and
                  Hans Ulrich Simon},
  title        = {Open Theoretical Questions in Reinforcement Learning},
  booktitle    = {Computational Learning Theory, 4th European Conference, EuroCOLT '99,
                  Nordkirchen, Germany, March 29-31, 1999, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1572},
  pages        = {11--17},
  publisher    = {Springer},
  year         = {1999},
  url          = {https://doi.org/10.1007/3-540-49097-3\_2},
  doi          = {10.1007/3-540-49097-3\_2},
  timestamp    = {Fri, 08 Sep 2023 21:18:19 +0200},
  biburl       = {https://dblp.org/rec/conf/eurocolt/Sutton99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SuttonMSM99,
  author       = {Richard S. Sutton and
                  David A. McAllester and
                  Satinder Singh and
                  Yishay Mansour},
  editor       = {Sara A. Solla and
                  Todd K. Leen and
                  Klaus{-}Robert M{\"{u}}ller},
  title        = {Policy Gradient Methods for Reinforcement Learning with Function Approximation},
  booktitle    = {Advances in Neural Information Processing Systems 12, {[NIPS} Conference,
                  Denver, Colorado, USA, November 29 - December 4, 1999]},
  pages        = {1057--1063},
  publisher    = {The {MIT} Press},
  year         = {1999},
  url          = {http://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SuttonMSM99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@book{DBLP:books/lib/SuttonB98,
  author       = {Richard S. Sutton and
                  Andrew G. Barto},
  title        = {Reinforcement learning - an introduction},
  series       = {Adaptive computation and machine learning},
  publisher    = {{MIT} Press},
  year         = {1998},
  url          = {https://www.worldcat.org/oclc/37293240},
  isbn         = {978-0-262-19398-6},
  timestamp    = {Fri, 17 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/books/lib/SuttonB98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tnn/SuttonB98,
  author       = {Richard S. Sutton and
                  Andrew G. Barto},
  title        = {Reinforcement Learning: An Introduction},
  journal      = {{IEEE} Trans. Neural Networks},
  volume       = {9},
  number       = {5},
  pages        = {1054--1054},
  year         = {1998},
  url          = {https://doi.org/10.1109/TNN.1998.712192},
  doi          = {10.1109/TNN.1998.712192},
  timestamp    = {Sun, 28 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tnn/SuttonB98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ecml/PrecupSS98,
  author       = {Doina Precup and
                  Richard S. Sutton and
                  Satinder Singh},
  editor       = {Claire Nedellec and
                  C{\'{e}}line Rouveirol},
  title        = {Theoretical Results on Reinforcement Learning with Temporally Abstract
                  Options},
  booktitle    = {Machine Learning: ECML-98, 10th European Conference on Machine Learning,
                  Chemnitz, Germany, April 21-23, 1998, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1398},
  pages        = {382--393},
  publisher    = {Springer},
  year         = {1998},
  url          = {https://doi.org/10.1007/BFb0026709},
  doi          = {10.1007/BFB0026709},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ecml/PrecupSS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SuttonPS98,
  author       = {Richard S. Sutton and
                  Doina Precup and
                  Satinder Singh},
  editor       = {Jude W. Shavlik},
  title        = {Intra-Option Learning about Temporally Abstract Actions},
  booktitle    = {Proceedings of the Fifteenth International Conference on Machine Learning
                  {(ICML} 1998), Madison, Wisconsin, USA, July 24-27, 1998},
  pages        = {556--564},
  publisher    = {Morgan Kaufmann},
  year         = {1998},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SuttonPS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/MollBPS98,
  author       = {Robert Moll and
                  Andrew G. Barto and
                  Theodore J. Perkins and
                  Richard S. Sutton},
  editor       = {Michael J. Kearns and
                  Sara A. Solla and
                  David A. Cohn},
  title        = {Learning Instance-Independent Value Functions to Enhance Local Search},
  booktitle    = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference,
                  Denver, Colorado, USA, November 30 - December 5, 1998]},
  pages        = {1017--1023},
  publisher    = {The {MIT} Press},
  year         = {1998},
  url          = {http://papers.nips.cc/paper/1573-learning-instance-independent-value-functions-to-enhance-local-search},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/MollBPS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SuttonSPR98,
  author       = {Richard S. Sutton and
                  Satinder Singh and
                  Doina Precup and
                  Balaraman Ravindran},
  editor       = {Michael J. Kearns and
                  Sara A. Solla and
                  David A. Cohn},
  title        = {Improved Switching among Temporally Abstract Actions},
  booktitle    = {Advances in Neural Information Processing Systems 11, {[NIPS} Conference,
                  Denver, Colorado, USA, November 30 - December 5, 1998]},
  pages        = {1066--1072},
  publisher    = {The {MIT} Press},
  year         = {1998},
  url          = {http://papers.nips.cc/paper/1607-improved-switching-among-temporally-abstract-actions},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SuttonSPR98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/seal/Sutton98,
  author       = {Richard S. Sutton},
  editor       = {Bob McKay and
                  Xin Yao and
                  Charles S. Newton and
                  Jong{-}Hwan Kim and
                  Takeshi Furuhashi},
  title        = {Reinforcement Learning: Past, Present and Future},
  booktitle    = {Simulated Evolution and Learning, Second Asia-Pacific Conference on
                  Simulated Evolution and Learning, SEAL'98, Canberra, Australia, November
                  24-27 1998, Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {1585},
  pages        = {195--197},
  publisher    = {Springer},
  year         = {1998},
  url          = {https://doi.org/10.1007/3-540-48873-1\_26},
  doi          = {10.1007/3-540-48873-1\_26},
  timestamp    = {Tue, 14 May 2019 10:00:41 +0200},
  biburl       = {https://dblp.org/rec/conf/seal/Sutton98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/adb/SantamariaSR97,
  author       = {Juan Carlos Santamar{\'{\i}}a and
                  Richard S. Sutton and
                  Ashwin Ram},
  title        = {Experiments with Reinforcement Learning in Problems with Continuous
                  State and Action Spaces},
  journal      = {Adapt. Behav.},
  volume       = {6},
  number       = {2},
  pages        = {163--217},
  year         = {1997},
  url          = {https://doi.org/10.1177/105971239700600201},
  doi          = {10.1177/105971239700600201},
  timestamp    = {Thu, 07 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/adb/SantamariaSR97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icann/Sutton97,
  author       = {Richard S. Sutton},
  editor       = {Wulfram Gerstner and
                  Alain Germond and
                  Martin Hasler and
                  Jean{-}Daniel Nicoud},
  title        = {On the Significance of Markov Decision Processes},
  booktitle    = {Artificial Neural Networks - {ICANN} '97, 7th International Conference,
                  Lausanne, Switzerland, October 8-10, 1997, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1327},
  pages        = {273--282},
  publisher    = {Springer},
  year         = {1997},
  url          = {https://doi.org/10.1007/BFb0020167},
  doi          = {10.1007/BFB0020167},
  timestamp    = {Tue, 14 May 2019 10:00:49 +0200},
  biburl       = {https://dblp.org/rec/conf/icann/Sutton97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/PrecupS97,
  author       = {Doina Precup and
                  Richard S. Sutton},
  editor       = {Douglas H. Fisher},
  title        = {Exponentiated Gradient Methods for Reinforcement Learning},
  booktitle    = {Proceedings of the Fourteenth International Conference on Machine
                  Learning {(ICML} 1997), Nashville, Tennessee, USA, July 8-12, 1997},
  pages        = {272--277},
  publisher    = {Morgan Kaufmann},
  year         = {1997},
  timestamp    = {Wed, 04 Dec 2002 12:34:29 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/PrecupS97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/PrecupS97,
  author       = {Doina Precup and
                  Richard S. Sutton},
  editor       = {Michael I. Jordan and
                  Michael J. Kearns and
                  Sara A. Solla},
  title        = {Multi-time Models for Temporally Abstract Planning},
  booktitle    = {Advances in Neural Information Processing Systems 10, {[NIPS} Conference,
                  Denver, Colorado, USA, 1997]},
  pages        = {1050--1056},
  publisher    = {The {MIT} Press},
  year         = {1997},
  url          = {http://papers.nips.cc/paper/1362-multi-time-models-for-temporally-abstract-planning},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/PrecupS97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/SinghS96,
  author       = {Satinder P. Singh and
                  Richard S. Sutton},
  title        = {Reinforcement Learning with Replacing Eligibility Traces},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {123--158},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018012322525},
  doi          = {10.1023/A:1018012322525},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/SinghS96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/Sutton95,
  author       = {Richard S. Sutton},
  editor       = {Armand Prieditis and
                  Stuart Russell},
  title        = {{TD} Models: Modeling the World at a Mixture of Time Scales},
  booktitle    = {Machine Learning, Proceedings of the Twelfth International Conference
                  on Machine Learning, Tahoe City, California, USA, July 9-12, 1995},
  pages        = {531--539},
  publisher    = {Morgan Kaufmann},
  year         = {1995},
  url          = {https://doi.org/10.1016/b978-1-55860-377-6.50072-4},
  doi          = {10.1016/B978-1-55860-377-6.50072-4},
  timestamp    = {Wed, 20 Apr 2022 13:29:53 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/Sutton95.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Sutton95,
  author       = {Richard S. Sutton},
  editor       = {David S. Touretzky and
                  Michael Mozer and
                  Michael E. Hasselmo},
  title        = {Generalization in Reinforcement Learning: Successful Examples Using
                  Sparse Coarse Coding},
  booktitle    = {Advances in Neural Information Processing Systems 8, NIPS, Denver,
                  CO, USA, November 27-30, 1995},
  pages        = {1038--1044},
  publisher    = {{MIT} Press},
  year         = {1995},
  url          = {http://papers.nips.cc/paper/1109-generalization-in-reinforcement-learning-successful-examples-using-sparse-coarse-coding},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/Sutton95.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SuttonW93,
  author       = {Richard S. Sutton and
                  Steven D. Whitehead},
  editor       = {Paul E. Utgoff},
  title        = {Online Learning with Random Representations},
  booktitle    = {Machine Learning, Proceedings of the Tenth International Conference,
                  University of Massachusetts, Amherst, MA, USA, June 27-29, 1993},
  pages        = {314--321},
  publisher    = {Morgan Kaufmann},
  year         = {1993},
  url          = {https://doi.org/10.1016/b978-1-55860-307-3.50047-2},
  doi          = {10.1016/B978-1-55860-307-3.50047-2},
  timestamp    = {Mon, 24 Jun 2019 13:59:03 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SuttonW93.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/Sutton92,
  author       = {Richard S. Sutton},
  editor       = {William R. Swartout},
  title        = {Adapting Bias by Gradient Descent: An Incremental Version of Delta-Bar-Delta},
  booktitle    = {Proceedings of the 10th National Conference on Artificial Intelligence,
                  San Jose, CA, USA, July 12-16, 1992},
  pages        = {171--176},
  publisher    = {{AAAI} Press / The {MIT} Press},
  year         = {1992},
  url          = {http://www.aaai.org/Library/AAAI/1992/aaai92-027.php},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/Sutton92.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/sigart/Sutton91,
  author       = {Richard S. Sutton},
  title        = {Dyna, an Integrated Architecture for Learning, Planning, and Reacting},
  journal      = {{SIGART} Bull.},
  volume       = {2},
  number       = {4},
  pages        = {160--163},
  year         = {1991},
  url          = {https://doi.org/10.1145/122344.122377},
  doi          = {10.1145/122344.122377},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/sigart/Sutton91.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SuttonM91,
  author       = {Richard S. Sutton and
                  Christopher J. Matheus},
  editor       = {Lawrence Birnbaum and
                  Gregg Collins},
  title        = {Learning Polynomial Functions by Feature Construction},
  booktitle    = {Proceedings of the Eighth International Workshop (ML91), Northwestern
                  University, Evanston, Illinois, {USA}},
  pages        = {208--212},
  publisher    = {Morgan Kaufmann},
  year         = {1991},
  url          = {https://doi.org/10.1016/b978-1-55860-200-7.50045-3},
  doi          = {10.1016/B978-1-55860-200-7.50045-3},
  timestamp    = {Wed, 19 Jun 2019 17:09:09 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SuttonM91.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/Sutton91,
  author       = {Richard S. Sutton},
  editor       = {Lawrence Birnbaum and
                  Gregg Collins},
  title        = {Planning by Incremental Dynamic Programming},
  booktitle    = {Proceedings of the Eighth International Workshop (ML91), Northwestern
                  University, Evanston, Illinois, {USA}},
  pages        = {353--357},
  publisher    = {Morgan Kaufmann},
  year         = {1991},
  url          = {https://doi.org/10.1016/b978-1-55860-200-7.50073-8},
  doi          = {10.1016/B978-1-55860-200-7.50073-8},
  timestamp    = {Wed, 19 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/Sutton91.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SangerSM91,
  author       = {Terence D. Sanger and
                  Richard S. Sutton and
                  Christopher J. Matheus},
  editor       = {John E. Moody and
                  Stephen Jose Hanson and
                  Richard Lippmann},
  title        = {Iterative Construction of Sparse Polynomial Approximations},
  booktitle    = {Advances in Neural Information Processing Systems 4, {[NIPS} Conference,
                  Denver, Colorado, USA, December 2-5, 1991]},
  pages        = {1064--1071},
  publisher    = {Morgan Kaufmann},
  year         = {1991},
  url          = {http://papers.nips.cc/paper/538-iterative-construction-of-sparse-polynomial-approximations},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SangerSM91.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/Sutton90,
  author       = {Richard S. Sutton},
  editor       = {Bruce W. Porter and
                  Raymond J. Mooney},
  title        = {Integrated Architectures for Learning, Planning, and Reacting Based
                  on Approximating Dynamic Programming},
  booktitle    = {Machine Learning, Proceedings of the Seventh International Conference
                  on Machine Learning, Austin, Texas, USA, June 21-23, 1990},
  pages        = {216--224},
  publisher    = {Morgan Kaufmann},
  year         = {1990},
  url          = {https://doi.org/10.1016/b978-1-55860-141-3.50030-4},
  doi          = {10.1016/B978-1-55860-141-3.50030-4},
  timestamp    = {Thu, 25 Jul 2019 17:43:05 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/Sutton90.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Sutton90,
  author       = {Richard S. Sutton},
  editor       = {Richard Lippmann and
                  John E. Moody and
                  David S. Touretzky},
  title        = {Integrated Modeling and Control Based on Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 3, {[NIPS} Conference,
                  Denver, Colorado, USA, November 26-29, 1990]},
  pages        = {471--478},
  publisher    = {Morgan Kaufmann},
  year         = {1990},
  url          = {http://papers.nips.cc/paper/388-integrated-modeling-and-control-based-on-reinforcement-learning},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/Sutton90.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BartoSW89,
  author       = {Andrew G. Barto and
                  Richard S. Sutton and
                  Christopher J. C. H. Watkins},
  editor       = {David S. Touretzky},
  title        = {Sequential Decision Probelms and Neural Networks},
  booktitle    = {Advances in Neural Information Processing Systems 2, {[NIPS} Conference,
                  Denver, Colorado, USA, November 27-30, 1989]},
  pages        = {686--693},
  publisher    = {Morgan Kaufmann},
  year         = {1989},
  url          = {http://papers.nips.cc/paper/194-sequential-decision-problems-and-neural-networks},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/BartoSW89.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/Sutton88,
  author       = {Richard S. Sutton},
  title        = {Learning to Predict by the Methods of Temporal Differences},
  journal      = {Mach. Learn.},
  volume       = {3},
  pages        = {9--44},
  year         = {1988},
  url          = {https://doi.org/10.1007/BF00115009},
  doi          = {10.1007/BF00115009},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/Sutton88.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/SelfridgeSB85,
  author       = {Oliver G. Selfridge and
                  Richard S. Sutton and
                  Andrew G. Barto},
  editor       = {Aravind K. Joshi},
  title        = {Training and Tracking in Robotics},
  booktitle    = {Proceedings of the 9th International Joint Conference on Artificial
                  Intelligence. Los Angeles, CA, USA, August 1985},
  pages        = {670--672},
  publisher    = {Morgan Kaufmann},
  year         = {1985},
  url          = {http://ijcai.org/Proceedings/85-1/Papers/129a.pdf},
  timestamp    = {Tue, 20 Aug 2019 16:19:04 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/SelfridgeSB85.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tsmc/BartoSA83,
  author       = {Andrew G. Barto and
                  Richard S. Sutton and
                  Charles W. Anderson},
  title        = {Neuronlike adaptive elements that can solve difficult learning control
                  problems},
  journal      = {{IEEE} Trans. Syst. Man Cybern.},
  volume       = {13},
  number       = {5},
  pages        = {834--846},
  year         = {1983},
  url          = {https://doi.org/10.1109/TSMC.1983.6313077},
  doi          = {10.1109/TSMC.1983.6313077},
  timestamp    = {Tue, 29 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tsmc/BartoSA83.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics