BibTeX records: Csaba Szepesvári

download as .bib file

@article{DBLP:journals/corr/abs-2402-17235,
  author       = {Jincheng Mei and
                  Zixin Zhong and
                  Bo Dai and
                  Alekh Agarwal and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  title        = {Stochastic Gradient Succeeds for Bandits},
  journal      = {CoRR},
  volume       = {abs/2402.17235},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.17235},
  doi          = {10.48550/ARXIV.2402.17235},
  eprinttype    = {arXiv},
  eprint       = {2402.17235},
  timestamp    = {Mon, 25 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-17235.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2403-05385,
  author       = {Alex Ayoub and
                  Kaiwen Wang and
                  Vincent Liu and
                  Samuel Robertson and
                  James McInerney and
                  Dawen Liang and
                  Nathan Kallus and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Switching the Loss Reduces the Cost in Batch Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2403.05385},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.05385},
  doi          = {10.48550/ARXIV.2403.05385},
  eprinttype    = {arXiv},
  eprint       = {2403.05385},
  timestamp    = {Thu, 04 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-05385.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2403-10379,
  author       = {Johannes Kirschner and
                  Seyed Alireza Bakhtiari and
                  Kushagra Chandak and
                  Volodymyr Tkachuk and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Regret Minimization via Saddle Point Optimization},
  journal      = {CoRR},
  volume       = {abs/2403.10379},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.10379},
  doi          = {10.48550/ARXIV.2403.10379},
  eprinttype    = {arXiv},
  eprint       = {2403.10379},
  timestamp    = {Mon, 08 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-10379.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/TkachukBKJBS23,
  author       = {Volodymyr Tkachuk and
                  Seyed Alireza Bakhtiari and
                  Johannes Kirschner and
                  Matej Jusup and
                  Ilija Bogunovic and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Francisco J. R. Ruiz and
                  Jennifer G. Dy and
                  Jan{-}Willem van de Meent},
  title        = {Efficient Planning in Combinatorial Action Spaces with Applications
                  to Cooperative Multi-Agent Reinforcement Learning},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  25-27 April 2023, Palau de Congressos, Valencia, Spain},
  series       = {Proceedings of Machine Learning Research},
  volume       = {206},
  pages        = {6342--6370},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v206/tkachuk23a.html},
  timestamp    = {Mon, 19 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/TkachukBKJBS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/LiuMKLWS23,
  author       = {Sihan Liu and
                  Gaurav Mahajan and
                  Daniel Kane and
                  Shachar Lovett and
                  Gell{\'{e}}rt Weisz and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Gergely Neu and
                  Lorenzo Rosasco},
  title        = {Exponential Hardness of Reinforcement Learning with Linear Function
                  Approximation},
  booktitle    = {The Thirty Sixth Annual Conference on Learning Theory, {COLT} 2023,
                  12-15 July 2023, Bangalore, India},
  series       = {Proceedings of Machine Learning Research},
  volume       = {195},
  pages        = {1588--1617},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v195/liu23b.html},
  timestamp    = {Tue, 09 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/colt/LiuMKLWS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/Zheng0QFYSW23,
  author       = {Sirui Zheng and
                  Lingxiao Wang and
                  Shuang Qiu and
                  Zuyue Fu and
                  Zhuoran Yang and
                  Csaba Szepesv{\'{a}}ri and
                  Zhaoran Wang},
  title        = {Optimistic Exploration with Learned Features Provably Solves Markov
                  Decision Processes with Neural Dynamics},
  booktitle    = {The Eleventh International Conference on Learning Representations,
                  {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023},
  publisher    = {OpenReview.net},
  year         = {2023},
  url          = {https://openreview.net/pdf?id=9kBCMNb5mc},
  timestamp    = {Wed, 27 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iclr/Zheng0QFYSW23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/Amortila0S23,
  author       = {Philip Amortila and
                  Nan Jiang and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {The Optimal Approximation Factors in Misspecified Off-Policy Value
                  Function Estimation},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {768--790},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/amortila23a.html},
  timestamp    = {Mon, 28 Aug 2023 17:23:08 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/Amortila0S23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/KitamuraKTVVYMM23,
  author       = {Toshinori Kitamura and
                  Tadashi Kozuno and
                  Yunhao Tang and
                  Nino Vieillard and
                  Michal Valko and
                  Wenhao Yang and
                  Jincheng Mei and
                  Pierre M{\'{e}}nard and
                  Mohammad Gheshlaghi Azar and
                  R{\'{e}}mi Munos and
                  Olivier Pietquin and
                  Matthieu Geist and
                  Csaba Szepesv{\'{a}}ri and
                  Wataru Kumagai and
                  Yutaka Matsuo},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {Regularization and Variance-Weighted Regression Achieves Minimax Optimality
                  in Linear MDPs: Theory and Practice},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {17135--17175},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/kitamura23a.html},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/KitamuraKTVVYMM23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/MeiZ0ASS23,
  author       = {Jincheng Mei and
                  Zixin Zhong and
                  Bo Dai and
                  Alekh Agarwal and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {Stochastic Gradient Succeeds for Bandits},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {24325--24360},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/mei23a.html},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/MeiZ0ASS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/ZhaoSSJ23,
  author       = {Yao Zhao and
                  Connor Stephens and
                  Csaba Szepesv{\'{a}}ri and
                  Kwang{-}Sung Jun},
  editor       = {Andreas Krause and
                  Emma Brunskill and
                  Kyunghyun Cho and
                  Barbara Engelhardt and
                  Sivan Sabato and
                  Jonathan Scarlett},
  title        = {Revisiting Simple Regret: Fast Rates for Returning a Good Arm},
  booktitle    = {International Conference on Machine Learning, {ICML} 2023, 23-29 July
                  2023, Honolulu, Hawaii, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {202},
  pages        = {42110--42158},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v202/zhao23g.html},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/ZhaoSSJ23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/KirschnerBCTS23,
  author       = {Johannes Kirschner and
                  Seyed Alireza Bakhtiari and
                  Kushagra Chandak and
                  Volodymyr Tkachuk and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Regret Minimization via Saddle Point Optimization},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/6eaf8c729af4fbeb18006dc2e6a41d9b-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/KirschnerBCTS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LeeLAJLS23,
  author       = {Chung{-}Wei Lee and
                  Qinghua Liu and
                  Yasin Abbasi{-}Yadkori and
                  Chi Jin and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Context-lumpable stochastic bandits},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/f564a952c1b86684baf7d7241ae27ac8-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/LeeLAJLS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LiuW00S23,
  author       = {Qinghua Liu and
                  Gell{\'{e}}rt Weisz and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Chi Jin and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Optimistic Natural Policy Gradient: a Simple Efficient Policy Optimization
                  Framework for Online {RL}},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/0b13c22ca208bc08f3fd13793292f25f-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/LiuW00S23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Mei0AGSS23,
  author       = {Jincheng Mei and
                  Bo Dai and
                  Alekh Agarwal and
                  Mohammad Ghavamzadeh and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Ordering-based Conditions for Global Convergence of Policy Gradient
                  Methods},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/61c00c07e6d27285e4b952e96cc65666-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/Mei0AGSS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Weisz0S23,
  author       = {Gell{\'{e}}rt Weisz and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Online {RL} in Linearly q\({}^{\mbox{{\(\pi\)}}}\)-Realizable MDPs
                  Is as Easy as in Linear MDPs If You Learn What to Ignore},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/b973a107336177a274069cefb011244c-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/Weisz0S23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/stoc/LiuNS023,
  author       = {Qinghua Liu and
                  Praneeth Netrapalli and
                  Csaba Szepesv{\'{a}}ri and
                  Chi Jin},
  editor       = {Barna Saha and
                  Rocco A. Servedio},
  title        = {Optimistic {MLE:} {A} Generic Model-Based Algorithm for Partially
                  Observable Sequential Decision Making},
  booktitle    = {Proceedings of the 55th Annual {ACM} Symposium on Theory of Computing,
                  {STOC} 2023, Orlando, FL, USA, June 20-23, 2023},
  pages        = {363--376},
  publisher    = {{ACM}},
  year         = {2023},
  url          = {https://doi.org/10.1145/3564246.3585161},
  doi          = {10.1145/3564246.3585161},
  timestamp    = {Mon, 22 May 2023 13:01:48 +0200},
  biburl       = {https://dblp.org/rec/conf/stoc/LiuNS023.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-06276,
  author       = {Jincheng Mei and
                  Wesley Chung and
                  Valentin Thomas and
                  Bo Dai and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  title        = {The Role of Baselines in Policy Gradient Optimization},
  journal      = {CoRR},
  volume       = {abs/2301.06276},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.06276},
  doi          = {10.48550/ARXIV.2301.06276},
  eprinttype    = {arXiv},
  eprint       = {2301.06276},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-06276.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-12579,
  author       = {Dong Yin and
                  Sridhar Thiagarajan and
                  Nevena Lazic and
                  Nived Rajaraman and
                  Botao Hao and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Sample Efficient Deep Reinforcement Learning via Local Planning},
  journal      = {CoRR},
  volume       = {abs/2301.12579},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.12579},
  doi          = {10.48550/ARXIV.2301.12579},
  eprinttype    = {arXiv},
  eprint       = {2301.12579},
  timestamp    = {Wed, 01 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-12579.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2302-04376,
  author       = {Volodymyr Tkachuk and
                  Seyed Alireza Bakhtiari and
                  Johannes Kirschner and
                  Matej Jusup and
                  Ilija Bogunovic and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Efficient Planning in Combinatorial Action Spaces with Applications
                  to Cooperative Multi-Agent Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2302.04376},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.04376},
  doi          = {10.48550/ARXIV.2302.04376},
  eprinttype    = {arXiv},
  eprint       = {2302.04376},
  timestamp    = {Mon, 13 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-04376.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2302-12940,
  author       = {Daniel Kane and
                  Sihan Liu and
                  Shachar Lovett and
                  Gaurav Mahajan and
                  Csaba Szepesv{\'{a}}ri and
                  Gell{\'{e}}rt Weisz},
  title        = {Exponential Hardness of Reinforcement Learning with Linear Function
                  Approximation},
  journal      = {CoRR},
  volume       = {abs/2302.12940},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.12940},
  doi          = {10.48550/ARXIV.2302.12940},
  eprinttype    = {arXiv},
  eprint       = {2302.12940},
  timestamp    = {Tue, 09 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-12940.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-11032,
  author       = {Qinghua Liu and
                  Gell{\'{e}}rt Weisz and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Chi Jin and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Optimistic Natural Policy Gradient: a Simple Efficient Policy Optimization
                  Framework for Online {RL}},
  journal      = {CoRR},
  volume       = {abs/2305.11032},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.11032},
  doi          = {10.48550/ARXIV.2305.11032},
  eprinttype    = {arXiv},
  eprint       = {2305.11032},
  timestamp    = {Thu, 25 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-11032.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-13185,
  author       = {Toshinori Kitamura and
                  Tadashi Kozuno and
                  Yunhao Tang and
                  Nino Vieillard and
                  Michal Valko and
                  Wenhao Yang and
                  Jincheng Mei and
                  Pierre M{\'{e}}nard and
                  Mohammad Gheshlaghi Azar and
                  R{\'{e}}mi Munos and
                  Olivier Pietquin and
                  Matthieu Geist and
                  Csaba Szepesv{\'{a}}ri and
                  Wataru Kumagai and
                  Yutaka Matsuo},
  title        = {Regularization and Variance-Weighted Regression Achieves Minimax Optimality
                  in Linear MDPs: Theory and Practice},
  journal      = {CoRR},
  volume       = {abs/2305.13185},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.13185},
  doi          = {10.48550/ARXIV.2305.13185},
  eprinttype    = {arXiv},
  eprint       = {2305.13185},
  timestamp    = {Fri, 26 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-13185.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-13053,
  author       = {Chung{-}Wei Lee and
                  Qinghua Liu and
                  Yasin Abbasi{-}Yadkori and
                  Chi Jin and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Context-lumpable stochastic bandits},
  journal      = {CoRR},
  volume       = {abs/2306.13053},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.13053},
  doi          = {10.48550/ARXIV.2306.13053},
  eprinttype    = {arXiv},
  eprint       = {2306.13053},
  timestamp    = {Tue, 27 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-13053.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2307-13332,
  author       = {Philip Amortila and
                  Nan Jiang and
                  Csaba Szepesv{\'{a}}ri},
  title        = {The Optimal Approximation Factors in Misspecified Off-Policy Value
                  Function Estimation},
  journal      = {CoRR},
  volume       = {abs/2307.13332},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2307.13332},
  doi          = {10.48550/ARXIV.2307.13332},
  eprinttype    = {arXiv},
  eprint       = {2307.13332},
  timestamp    = {Tue, 01 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2307-13332.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-07811,
  author       = {Gell{\'{e}}rt Weisz and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Online {RL} in Linearly q\({}^{\mbox{{\(\pi\)}}}\)-Realizable MDPs
                  Is as Easy as in Linear MDPs If You Learn What to Ignore},
  journal      = {CoRR},
  volume       = {abs/2310.07811},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.07811},
  doi          = {10.48550/ARXIV.2310.07811},
  eprinttype    = {arXiv},
  eprint       = {2310.07811},
  timestamp    = {Tue, 24 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-07811.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-20581,
  author       = {Jihao Andreas Lin and
                  Shreyas Padhy and
                  Javier Antor{\'{a}}n and
                  Austin Tripp and
                  Alexander Terenin and
                  Csaba Szepesv{\'{a}}ri and
                  Jos{\'{e}} Miguel Hern{\'{a}}ndez{-}Lobato and
                  David Janz},
  title        = {Stochastic Gradient Descent for Gaussian Processes Done Right},
  journal      = {CoRR},
  volume       = {abs/2310.20581},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.20581},
  doi          = {10.48550/ARXIV.2310.20581},
  eprinttype    = {arXiv},
  eprint       = {2310.20581},
  timestamp    = {Fri, 03 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-20581.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2311-07565,
  author       = {David Janz and
                  Shuai Liu and
                  Alex Ayoub and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Exploration via linearly perturbed loss minimisation},
  journal      = {CoRR},
  volume       = {abs/2311.07565},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2311.07565},
  doi          = {10.48550/ARXIV.2311.07565},
  eprinttype    = {arXiv},
  eprint       = {2311.07565},
  timestamp    = {Wed, 15 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2311-07565.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2311-08376,
  author       = {David Janz and
                  Alexander E. Litvak and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Ensemble sampling for linear bandits: small ensembles suffice},
  journal      = {CoRR},
  volume       = {abs/2311.08376},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2311.08376},
  doi          = {10.48550/ARXIV.2311.08376},
  eprinttype    = {arXiv},
  eprint       = {2311.08376},
  timestamp    = {Thu, 23 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2311-08376.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/HaoLYAS22,
  author       = {Botao Hao and
                  Nevena Lazic and
                  Dong Yin and
                  Yasin Abbasi{-}Yadkori and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Gustau Camps{-}Valls and
                  Francisco J. R. Ruiz and
                  Isabel Valera},
  title        = {Confident Least Square Value Iteration with Local Access to a Simulator},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2022, 28-30 March 2022, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {151},
  pages        = {2420--2435},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v151/hao22a.html},
  timestamp    = {Sat, 30 Sep 2023 09:34:08 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/HaoLYAS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/RajJ0S22,
  author       = {Anant Raj and
                  Pooria Joulani and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Gustau Camps{-}Valls and
                  Francisco J. R. Ruiz and
                  Isabel Valera},
  title        = {Faster Rates, Adaptive Algorithms, and Finite-Time Bounds for Linear
                  Composition Optimization and Gradient {TD} Learning},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2022, 28-30 March 2022, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {151},
  pages        = {7176--7186},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v151/raj22a.html},
  timestamp    = {Fri, 20 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/RajJ0S22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/XiaoLDSS22,
  author       = {Chenjun Xiao and
                  Ilbin Lee and
                  Bo Dai and
                  Dale Schuurmans and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Gustau Camps{-}Valls and
                  Francisco J. R. Ruiz and
                  Isabel Valera},
  title        = {The Curse of Passive Data Collection in Batch Reinforcement Learning},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2022, 28-30 March 2022, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {151},
  pages        = {8413--8438},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v151/xiao22b.html},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/XiaoLDSS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/WeiszS022,
  author       = {Gell{\'{e}}rt Weisz and
                  Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s Gy{\"{o}}rgy},
  editor       = {Sanjoy Dasgupta and
                  Nika Haghtalab},
  title        = {TensorPlan and the Few Actions Lower Bound for Planning in MDPs under
                  Linear Realizability of Optimal Value Functions},
  booktitle    = {International Conference on Algorithmic Learning Theory, 29 March
                  - 1 April 2022, Paris, France},
  series       = {Proceedings of Machine Learning Research},
  volume       = {167},
  pages        = {1097--1137},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v167/weisz22a.html},
  timestamp    = {Fri, 10 Mar 2023 11:41:45 +0100},
  biburl       = {https://dblp.org/rec/conf/alt/WeiszS022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/YinHALS22,
  author       = {Dong Yin and
                  Botao Hao and
                  Yasin Abbasi{-}Yadkori and
                  Nevena Lazic and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Sanjoy Dasgupta and
                  Nika Haghtalab},
  title        = {Efficient local planning with linear function approximation},
  booktitle    = {International Conference on Algorithmic Learning Theory, 29 March
                  - 1 April 2022, Paris, France},
  series       = {Proceedings of Machine Learning Research},
  volume       = {167},
  pages        = {1165--1192},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v167/yin22a.html},
  timestamp    = {Fri, 25 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/alt/YinHALS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/LiuCSJ22,
  author       = {Qinghua Liu and
                  Alan Chung and
                  Csaba Szepesv{\'{a}}ri and
                  Chi Jin},
  editor       = {Po{-}Ling Loh and
                  Maxim Raginsky},
  title        = {When Is Partially Observable Reinforcement Learning Not Scary?},
  booktitle    = {Conference on Learning Theory, 2-5 July 2022, London, {UK}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {178},
  pages        = {5175--5220},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v178/liu22f.html},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/colt/LiuCSJ22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LiuS022,
  author       = {Qinghua Liu and
                  Csaba Szepesv{\'{a}}ri and
                  Chi Jin},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Sample-Efficient Reinforcement Learning of Partially Observable Markov
                  Games},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/743459dae9b2c5d2904e5432d5298128-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/LiuS022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/MeiCTDSS22,
  author       = {Jincheng Mei and
                  Wesley Chung and
                  Valentin Thomas and
                  Bo Dai and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {The Role of Baselines in Policy Gradient Optimization},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/718d02a76d69686a36eccc8cde3e6a41-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/MeiCTDSS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Vaswani0S22,
  author       = {Sharan Vaswani and
                  Lin Yang and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Near-Optimal Sample Complexity Bounds for Constrained MDPs},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/14a5ebc9cd2e507cd811df78c15bf5d7-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/Vaswani0S22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Weisz0KS22,
  author       = {Gell{\'{e}}rt Weisz and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Tadashi Kozuno and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Confident Approximate Policy Iteration for Efficient Local Planning
                  in {\textdollar}q{\^{}}{\textbackslash}pi{\textdollar}-realizable
                  MDPs},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/a3bfb116214815682a0d0d88ea95cd12-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/Weisz0KS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/YuanNWZCSW22,
  author       = {Hui Yuan and
                  Chengzhuo Ni and
                  Huazheng Wang and
                  Xuezhou Zhang and
                  Le Cong and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  editor       = {Sanmi Koyejo and
                  S. Mohamed and
                  A. Agarwal and
                  Danielle Belgrave and
                  K. Cho and
                  A. Oh},
  title        = {Bandit Theory and Thompson Sampling-Guided Directed Evolution for
                  Sequence Optimization},
  booktitle    = {Advances in Neural Information Processing Systems 35: Annual Conference
                  on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans,
                  LA, USA, November 28 - December 9, 2022},
  year         = {2022},
  url          = {http://papers.nips.cc/paper\_files/paper/2022/hash/fa3c139cf8084de7bfd944f1c90c8695-Abstract-Conference.html},
  timestamp    = {Mon, 08 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/YuanNWZCSW22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/RenZSD22,
  author       = {Tongzheng Ren and
                  Tianjun Zhang and
                  Csaba Szepesv{\'{a}}ri and
                  Bo Dai},
  editor       = {James Cussens and
                  Kun Zhang},
  title        = {A free lunch from the noise: Provable and practical exploration for
                  representation learning},
  booktitle    = {Uncertainty in Artificial Intelligence, Proceedings of the Thirty-Eighth
                  Conference on Uncertainty in Artificial Intelligence, {UAI} 2022,
                  1-5 August 2022, Eindhoven, The Netherlands},
  series       = {Proceedings of Machine Learning Research},
  volume       = {180},
  pages        = {1686--1696},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {https://proceedings.mlr.press/v180/ren22a.html},
  timestamp    = {Mon, 03 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/RenZSD22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@proceedings{DBLP:conf/icml/2022,
  editor       = {Kamalika Chaudhuri and
                  Stefanie Jegelka and
                  Le Song and
                  Csaba Szepesv{\'{a}}ri and
                  Gang Niu and
                  Sivan Sabato},
  title        = {International Conference on Machine Learning, {ICML} 2022, 17-23 July
                  2022, Baltimore, Maryland, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {162},
  publisher    = {{PMLR}},
  year         = {2022},
  url          = {http://proceedings.mlr.press/v162/},
  timestamp    = {Tue, 12 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/2022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-05176,
  author       = {Arushi Jain and
                  Sharan Vaswani and
                  Reza Babanezhad and
                  Csaba Szepesv{\'{a}}ri and
                  Doina Precup},
  title        = {Towards Painless Policy Optimization for Constrained MDPs},
  journal      = {CoRR},
  volume       = {abs/2204.05176},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.05176},
  doi          = {10.48550/ARXIV.2204.05176},
  eprinttype    = {arXiv},
  eprint       = {2204.05176},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-05176.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-08967,
  author       = {Qinghua Liu and
                  Alan Chung and
                  Csaba Szepesv{\'{a}}ri and
                  Chi Jin},
  title        = {When Is Partially Observable Reinforcement Learning Not Scary?},
  journal      = {CoRR},
  volume       = {abs/2204.08967},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.08967},
  doi          = {10.48550/ARXIV.2204.08967},
  eprinttype    = {arXiv},
  eprint       = {2204.08967},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-08967.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-14211,
  author       = {Tadashi Kozuno and
                  Wenhao Yang and
                  Nino Vieillard and
                  Toshinori Kitamura and
                  Yunhao Tang and
                  Jincheng Mei and
                  Pierre M{\'{e}}nard and
                  Mohammad Gheshlaghi Azar and
                  Michal Valko and
                  R{\'{e}}mi Munos and
                  Olivier Pietquin and
                  Matthieu Geist and
                  Csaba Szepesv{\'{a}}ri},
  title        = {KL-Entropy-Regularized {RL} with a Generative Model is Minimax Optimal},
  journal      = {CoRR},
  volume       = {abs/2205.14211},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.14211},
  doi          = {10.48550/ARXIV.2205.14211},
  eprinttype    = {arXiv},
  eprint       = {2205.14211},
  timestamp    = {Wed, 01 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-14211.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-01315,
  author       = {Qinghua Liu and
                  Csaba Szepesv{\'{a}}ri and
                  Chi Jin},
  title        = {Sample-Efficient Reinforcement Learning of Partially Observable Markov
                  Games},
  journal      = {CoRR},
  volume       = {abs/2206.01315},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.01315},
  doi          = {10.48550/ARXIV.2206.01315},
  eprinttype    = {arXiv},
  eprint       = {2206.01315},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-01315.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-02092,
  author       = {Hui Yuan and
                  Chengzhuo Ni and
                  Huazheng Wang and
                  Xuezhou Zhang and
                  Le Cong and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  title        = {Bandit Theory and Thompson Sampling-Guided Directed Evolution for
                  Sequence Optimization},
  journal      = {CoRR},
  volume       = {abs/2206.02092},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.02092},
  doi          = {10.48550/ARXIV.2206.02092},
  eprinttype    = {arXiv},
  eprint       = {2206.02092},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-02092.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-06270,
  author       = {Sharan Vaswani and
                  Lin F. Yang and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Near-Optimal Sample Complexity Bounds for Constrained MDPs},
  journal      = {CoRR},
  volume       = {abs/2206.06270},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.06270},
  doi          = {10.48550/ARXIV.2206.06270},
  eprinttype    = {arXiv},
  eprint       = {2206.06270},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-06270.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2209-14997,
  author       = {Qinghua Liu and
                  Praneeth Netrapalli and
                  Csaba Szepesv{\'{a}}ri and
                  Chi Jin},
  title        = {Optimistic {MLE} - {A} Generic Model-based Algorithm for Partially
                  Observable Sequential Decision Making},
  journal      = {CoRR},
  volume       = {abs/2209.14997},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2209.14997},
  doi          = {10.48550/ARXIV.2209.14997},
  eprinttype    = {arXiv},
  eprint       = {2209.14997},
  timestamp    = {Wed, 07 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2209-14997.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-15755,
  author       = {Gell{\'{e}}rt Weisz and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Tadashi Kozuno and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Confident Approximate Policy Iteration for Efficient Local Planning
                  in q\({}^{\mbox{{\(\pi\)}}}\)-realizable MDPs},
  journal      = {CoRR},
  volume       = {abs/2210.15755},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.15755},
  doi          = {10.48550/ARXIV.2210.15755},
  eprinttype    = {arXiv},
  eprint       = {2210.15755},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-15755.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-16913,
  author       = {Yao Zhao and
                  Connor Stephens and
                  Csaba Szepesv{\'{a}}ri and
                  Kwang{-}Sung Jun},
  title        = {Revisiting Simple Regret Minimization in Multi-Armed Bandits},
  journal      = {CoRR},
  volume       = {abs/2210.16913},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.16913},
  doi          = {10.48550/ARXIV.2210.16913},
  eprinttype    = {arXiv},
  eprint       = {2210.16913},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-16913.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2212-13848,
  author       = {Ilja Kuzborskij and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Learning Lipschitz Functions by GD-trained Shallow Overparameterized
                  ReLU Neural Networks},
  journal      = {CoRR},
  volume       = {abs/2212.13848},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2212.13848},
  doi          = {10.48550/ARXIV.2212.13848},
  eprinttype    = {arXiv},
  eprint       = {2212.13848},
  timestamp    = {Thu, 05 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2212-13848.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/Perez-OrtizRSS21,
  author       = {Mar{\'{\i}}a P{\'{e}}rez{-}Ortiz and
                  Omar Rivasplata and
                  John Shawe{-}Taylor and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Tighter Risk Certificates for Neural Networks},
  journal      = {J. Mach. Learn. Res.},
  volume       = {22},
  pages        = {227:1--227:40},
  year         = {2021},
  url          = {http://jmlr.org/papers/v22/20-879.html},
  timestamp    = {Mon, 04 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/Perez-OrtizRSS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/LiGLSW21,
  author       = {Yuxi Li and
                  Alborz Geramifard and
                  Lihong Li and
                  Csaba Szepesv{\'{a}}ri and
                  Tao Wang},
  title        = {Guest editorial: special issue on reinforcement learning for real
                  life},
  journal      = {Mach. Learn.},
  volume       = {110},
  number       = {9},
  pages        = {2291--2293},
  year         = {2021},
  url          = {https://doi.org/10.1007/s10994-021-06041-3},
  doi          = {10.1007/S10994-021-06041-3},
  timestamp    = {Tue, 30 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ml/LiGLSW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/HaoLSW21,
  author       = {Botao Hao and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  editor       = {Arindam Banerjee and
                  Kenji Fukumizu},
  title        = {Online Sparse Reinforcement Learning},
  booktitle    = {The 24th International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2021, April 13-15, 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {130},
  pages        = {316--324},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v130/hao21a.html},
  timestamp    = {Wed, 14 Apr 2021 16:51:37 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/HaoLSW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/HaoLAJS21,
  author       = {Botao Hao and
                  Nevena Lazic and
                  Yasin Abbasi{-}Yadkori and
                  Pooria Joulani and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Arindam Banerjee and
                  Kenji Fukumizu},
  title        = {Adaptive Approximate Policy Iteration},
  booktitle    = {The 24th International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2021, April 13-15, 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {130},
  pages        = {523--531},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v130/hao21b.html},
  timestamp    = {Wed, 14 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/HaoLAJS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/KuzborskijV0S21,
  author       = {Ilja Kuzborskij and
                  Claire Vernade and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Arindam Banerjee and
                  Kenji Fukumizu},
  title        = {Confident Off-Policy Evaluation and Selection through Self-Normalized
                  Importance Weighting},
  booktitle    = {The 24th International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2021, April 13-15, 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {130},
  pages        = {640--648},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v130/kuzborskij21a.html},
  timestamp    = {Wed, 14 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/KuzborskijV0S21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/WeiszAS21,
  author       = {Gell{\'{e}}rt Weisz and
                  Philip Amortila and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Vitaly Feldman and
                  Katrina Ligett and
                  Sivan Sabato},
  title        = {Exponential Lower Bounds for Planning in MDPs With Linearly-Realizable
                  Optimal Action-Value Functions},
  booktitle    = {Algorithmic Learning Theory, 16-19 March 2021, Virtual Conference,
                  Worldwide},
  series       = {Proceedings of Machine Learning Research},
  volume       = {132},
  pages        = {1237--1264},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v132/weisz21a.html},
  timestamp    = {Fri, 26 Mar 2021 15:45:50 +0100},
  biburl       = {https://dblp.org/rec/conf/alt/WeiszAS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/KirschnerLVS21,
  author       = {Johannes Kirschner and
                  Tor Lattimore and
                  Claire Vernade and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Mikhail Belkin and
                  Samory Kpotufe},
  title        = {Asymptotically Optimal Information-Directed Sampling},
  booktitle    = {Conference on Learning Theory, {COLT} 2021, 15-19 August 2021, Boulder,
                  Colorado, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {134},
  pages        = {2777--2821},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v134/kirschner21a.html},
  timestamp    = {Wed, 25 Aug 2021 17:11:16 +0200},
  biburl       = {https://dblp.org/rec/conf/colt/KirschnerLVS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/KuzborskijS21,
  author       = {Ilja Kuzborskij and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Mikhail Belkin and
                  Samory Kpotufe},
  title        = {Nonparametric Regression with Shallow Overparameterized Neural Networks
                  Trained by {GD} with Early Stopping},
  booktitle    = {Conference on Learning Theory, {COLT} 2021, 15-19 August 2021, Boulder,
                  Colorado, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {134},
  pages        = {2853--2890},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v134/kuzborskij21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/colt/KuzborskijS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/WeiszAJAJS21,
  author       = {Gell{\'{e}}rt Weisz and
                  Philip Amortila and
                  Barnab{\'{a}}s Janzer and
                  Yasin Abbasi{-}Yadkori and
                  Nan Jiang and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Mikhail Belkin and
                  Samory Kpotufe},
  title        = {On Query-efficient Planning in MDPs under Linear Realizability of
                  the Optimal State-value Function},
  booktitle    = {Conference on Learning Theory, {COLT} 2021, 15-19 August 2021, Boulder,
                  Colorado, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {134},
  pages        = {4355--4385},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v134/weisz21a.html},
  timestamp    = {Tue, 19 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/colt/WeiszAJAJS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/ZhouGS21,
  author       = {Dongruo Zhou and
                  Quanquan Gu and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Mikhail Belkin and
                  Samory Kpotufe},
  title        = {Nearly Minimax Optimal Reinforcement Learning for Linear Mixture Markov
                  Decision Processes},
  booktitle    = {Conference on Learning Theory, {COLT} 2021, 15-19 August 2021, Boulder,
                  Colorado, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {134},
  pages        = {4532--4576},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v134/zhou21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/colt/ZhouGS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/HaoDLSW21,
  author       = {Botao Hao and
                  Yaqi Duan and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Sparse Feature Selection Makes Batch Reinforcement Learning More Sample
                  Efficient},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {4063--4073},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/hao21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/HaoDLSW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/HaoJDLSW21,
  author       = {Botao Hao and
                  Xiang Ji and
                  Yaqi Duan and
                  Hao Lu and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Bootstrapping Fitted Q-Evaluation for Off-Policy Inference},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {4074--4084},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/hao21b.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/HaoJDLSW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/KonobeevKS21,
  author       = {Mikhail Konobeev and
                  Ilja Kuzborskij and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {A Distribution-dependent Analysis of Meta Learning},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {5697--5706},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/konobeev21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/KonobeevKS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/KvetonKZHMBS21,
  author       = {Branislav Kveton and
                  Mikhail Konobeev and
                  Manzil Zaheer and
                  Chih{-}Wei Hsu and
                  Martin Mladenov and
                  Craig Boutilier and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Meta-Thompson Sampling},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {5884--5893},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/kveton21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/KvetonKZHMBS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/LazicYAS21,
  author       = {Nevena Lazic and
                  Dong Yin and
                  Yasin Abbasi{-}Yadkori and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Improved Regret Bound and Experience Replay in Regularized Policy
                  Iteration},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {6032--6042},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/lazic21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/LazicYAS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/MeiGDSS21,
  author       = {Jincheng Mei and
                  Yue Gao and
                  Bo Dai and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Leveraging Non-uniformity in First-order Non-convex Optimization},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {7555--7564},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/mei21a.html},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/MeiGDSS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/XiaoWMDL0SS21,
  author       = {Chenjun Xiao and
                  Yifan Wu and
                  Jincheng Mei and
                  Bo Dai and
                  Tor Lattimore and
                  Lihong Li and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {On the Optimality of Batch Policy Optimization Algorithms},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {11362--11371},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/xiao21b.html},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/XiaoWMDL0SS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZhangNYSW21,
  author       = {Junyu Zhang and
                  Chengzhuo Ni and
                  Zheng Yu and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {On the Convergence and Sample Efficiency of Variance-Reduced Policy
                  Gradient Method},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {2228--2240},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/11c484ea9305ea4c7bb6b2e6d570d466-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ZhangNYSW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/MeiDXSS21,
  author       = {Jincheng Mei and
                  Bo Dai and
                  Chenjun Xiao and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Understanding the Effect of Stochasticity in Policy Optimization},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {19339--19351},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/a12f69495f41bb3b637ba1b6238884d6-Abstract.html},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/MeiDXSS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BasuKZS21,
  author       = {Soumya Basu and
                  Branislav Kveton and
                  Manzil Zaheer and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {No Regrets for Learning the Prior in Bandits},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {28029--28041},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/ec1f764517b7ffb52057af6df18142b7-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/BasuKZS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/KuzborskijSRRP21,
  author       = {Ilja Kuzborskij and
                  Csaba Szepesv{\'{a}}ri and
                  Omar Rivasplata and
                  Amal Rannen{-}Triki and
                  Razvan Pascanu},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {On the Role of Optimization in Double Descent: {A} Least Squares Study},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {29567--29577},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/f754186469a933256d7d64095e963594-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/KuzborskijSRRP21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-02049,
  author       = {Gell{\'{e}}rt Weisz and
                  Philip Amortila and
                  Barnab{\'{a}}s Janzer and
                  Yasin Abbasi{-}Yadkori and
                  Nan Jiang and
                  Csaba Szepesv{\'{a}}ri},
  title        = {On Query-efficient Planning in MDPs under Linear Realizability of
                  the Optimal State-value Function},
  journal      = {CoRR},
  volume       = {abs/2102.02049},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.02049},
  eprinttype    = {arXiv},
  eprint       = {2102.02049},
  timestamp    = {Tue, 19 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-02049.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-03607,
  author       = {Botao Hao and
                  Xiang Ji and
                  Yaqi Duan and
                  Hao Lu and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  title        = {Bootstrapping Statistical Inference for Off-Policy Evaluation},
  journal      = {CoRR},
  volume       = {abs/2102.03607},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.03607},
  eprinttype    = {arXiv},
  eprint       = {2102.03607},
  timestamp    = {Wed, 10 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-03607.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-06129,
  author       = {Branislav Kveton and
                  Mikhail Konobeev and
                  Manzil Zaheer and
                  Chih{-}Wei Hsu and
                  Martin Mladenov and
                  Craig Boutilier and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Meta-Thompson Sampling},
  journal      = {CoRR},
  volume       = {abs/2102.06129},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.06129},
  eprinttype    = {arXiv},
  eprint       = {2102.06129},
  timestamp    = {Thu, 18 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-06129.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-06234,
  author       = {Nevena Lazic and
                  Botao Hao and
                  Yasin Abbasi{-}Yadkori and
                  Dale Schuurmans and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Optimization Issues in KL-Constrained Approximate Policy Iteration},
  journal      = {CoRR},
  volume       = {abs/2102.06234},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.06234},
  eprinttype    = {arXiv},
  eprint       = {2102.06234},
  timestamp    = {Thu, 18 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-06234.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-08607,
  author       = {Junyu Zhang and
                  Chengzhuo Ni and
                  Zheng Yu and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  title        = {On the Convergence and Sample Efficiency of Variance-Reduced Policy
                  Gradient Method},
  journal      = {CoRR},
  volume       = {abs/2102.08607},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.08607},
  eprinttype    = {arXiv},
  eprint       = {2102.08607},
  timestamp    = {Fri, 19 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-08607.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-12611,
  author       = {Nevena Lazic and
                  Dong Yin and
                  Yasin Abbasi{-}Yadkori and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Improved Regret Bound and Experience Replay in Regularized Policy
                  Iteration},
  journal      = {CoRR},
  volume       = {abs/2102.12611},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.12611},
  eprinttype    = {arXiv},
  eprint       = {2102.12611},
  timestamp    = {Tue, 02 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-12611.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-02293,
  author       = {Chenjun Xiao and
                  Yifan Wu and
                  Tor Lattimore and
                  Bo Dai and
                  Jincheng Mei and
                  Lihong Li and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  title        = {On the Optimality of Batch Policy Optimization Algorithms},
  journal      = {CoRR},
  volume       = {abs/2104.02293},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.02293},
  eprinttype    = {arXiv},
  eprint       = {2104.02293},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-02293.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2105-06072,
  author       = {Jincheng Mei and
                  Yue Gao and
                  Bo Dai and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  title        = {Leveraging Non-uniformity in First-order Non-convex Optimization},
  journal      = {CoRR},
  volume       = {abs/2105.06072},
  year         = {2021},
  url          = {https://arxiv.org/abs/2105.06072},
  eprinttype    = {arXiv},
  eprint       = {2105.06072},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2105-06072.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-08199,
  author       = {Abbas Abdolmaleki and
                  Sandy H. Huang and
                  Giulia Vezzani and
                  Bobak Shahriari and
                  Jost Tobias Springenberg and
                  Shruti Mishra and
                  Dhruva TB and
                  Arunkumar Byravan and
                  Konstantinos Bousmalis and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri and
                  Raia Hadsell and
                  Nicolas Heess and
                  Martin A. Riedmiller},
  title        = {On Multi-objective Policy Optimization as a Tool for Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2106.08199},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.08199},
  eprinttype    = {arXiv},
  eprint       = {2106.08199},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-08199.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-09973,
  author       = {Chenjun Xiao and
                  Ilbin Lee and
                  Bo Dai and
                  Dale Schuurmans and
                  Csaba Szepesv{\'{a}}ri},
  title        = {On the Sample Complexity of Batch Reinforcement Learning with Policy-Induced
                  Data},
  journal      = {CoRR},
  volume       = {abs/2106.09973},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.09973},
  eprinttype    = {arXiv},
  eprint       = {2106.09973},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-09973.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2107-06196,
  author       = {Soumya Basu and
                  Branislav Kveton and
                  Manzil Zaheer and
                  Csaba Szepesv{\'{a}}ri},
  title        = {No Regrets for Learning the Prior in Bandits},
  journal      = {CoRR},
  volume       = {abs/2107.06196},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.06196},
  eprinttype    = {arXiv},
  eprint       = {2107.06196},
  timestamp    = {Fri, 23 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-06196.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2107-12685,
  author       = {Ilja Kuzborskij and
                  Csaba Szepesv{\'{a}}ri and
                  Omar Rivasplata and
                  Amal Rannen{-}Triki and
                  Razvan Pascanu},
  title        = {On the Role of Optimization in Double Descent: {A} Least Squares Study},
  journal      = {CoRR},
  volume       = {abs/2107.12685},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.12685},
  eprinttype    = {arXiv},
  eprint       = {2107.12685},
  timestamp    = {Mon, 02 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-12685.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2108-05533,
  author       = {Dong Yin and
                  Botao Hao and
                  Yasin Abbasi{-}Yadkori and
                  Nevena Lazic and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Efficient Local Planning with Linear Function Approximation},
  journal      = {CoRR},
  volume       = {abs/2108.05533},
  year         = {2021},
  url          = {https://arxiv.org/abs/2108.05533},
  eprinttype    = {arXiv},
  eprint       = {2108.05533},
  timestamp    = {Wed, 18 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2108-05533.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-02195,
  author       = {Gell{\'{e}}rt Weisz and
                  Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s Gy{\"{o}}rgy},
  title        = {TensorPlan and the Few Actions Lower Bound for Planning in MDPs under
                  Linear Realizability of Optimal Value Functions},
  journal      = {CoRR},
  volume       = {abs/2110.02195},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.02195},
  eprinttype    = {arXiv},
  eprint       = {2110.02195},
  timestamp    = {Mon, 18 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-02195.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-08984,
  author       = {Han Zhong and
                  Zhuoran Yang and
                  Zhaoran Wang and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Optimistic Policy Optimization is Provably Efficient in Non-stationary
                  MDPs},
  journal      = {CoRR},
  volume       = {abs/2110.08984},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.08984},
  eprinttype    = {arXiv},
  eprint       = {2110.08984},
  timestamp    = {Wed, 27 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-08984.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-15572,
  author       = {Jincheng Mei and
                  Bo Dai and
                  Chenjun Xiao and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  title        = {Understanding the Effect of Stochasticity in Policy Optimization},
  journal      = {CoRR},
  volume       = {abs/2110.15572},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.15572},
  eprinttype    = {arXiv},
  eprint       = {2110.15572},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-15572.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2111-11485,
  author       = {Tongzheng Ren and
                  Tianjun Zhang and
                  Csaba Szepesv{\'{a}}ri and
                  Bo Dai},
  title        = {A Free Lunch from the Noise: Provable and Practical Exploration for
                  Representation Learning},
  journal      = {CoRR},
  volume       = {abs/2111.11485},
  year         = {2021},
  url          = {https://arxiv.org/abs/2111.11485},
  eprinttype    = {arXiv},
  eprint       = {2111.11485},
  timestamp    = {Mon, 03 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2111-11485.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/aamas/TuylsPLHELSG20,
  author       = {Karl Tuyls and
                  Julien P{\'{e}}rolat and
                  Marc Lanctot and
                  Edward Hughes and
                  Richard Everett and
                  Joel Z. Leibo and
                  Csaba Szepesv{\'{a}}ri and
                  Thore Graepel},
  title        = {Bounds and dynamics for empirical game theoretic analysis},
  journal      = {Auton. Agents Multi Agent Syst.},
  volume       = {34},
  number       = {1},
  pages        = {7},
  year         = {2020},
  url          = {https://doi.org/10.1007/s10458-019-09432-y},
  doi          = {10.1007/S10458-019-09432-Y},
  timestamp    = {Fri, 09 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/aamas/TuylsPLHELSG20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/MaOSS20,
  author       = {Yao Ma and
                  Alex Olshevsky and
                  Csaba Szepesv{\'{a}}ri and
                  Venkatesh Saligrama},
  title        = {Gradient Descent for Sparse Rank-One Matrix Completion for Crowd-Sourced
                  Aggregation of Sparsely Interacting Workers},
  journal      = {J. Mach. Learn. Res.},
  volume       = {21},
  pages        = {133:1--133:36},
  year         = {2020},
  url          = {http://jmlr.org/papers/v21/19-359.html},
  timestamp    = {Wed, 18 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jmlr/MaOSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tcs/JoulaniGS20,
  author       = {Pooria Joulani and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {A modular analysis of adaptive (non-)convex optimization: Optimism,
                  composite objectives, variance reduction, and variational bounds},
  journal      = {Theor. Comput. Sci.},
  volume       = {808},
  pages        = {108--138},
  year         = {2020},
  url          = {https://doi.org/10.1016/j.tcs.2019.11.015},
  doi          = {10.1016/J.TCS.2019.11.015},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tcs/JoulaniGS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/KvetonZS0GB20,
  author       = {Branislav Kveton and
                  Manzil Zaheer and
                  Csaba Szepesv{\'{a}}ri and
                  Lihong Li and
                  Mohammad Ghavamzadeh and
                  Craig Boutilier},
  editor       = {Silvia Chiappa and
                  Roberto Calandra},
  title        = {Randomized Exploration in Generalized Linear Bandits},
  booktitle    = {The 23rd International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2020, 26-28 August 2020, Online [Palermo, Sicily, Italy]},
  series       = {Proceedings of Machine Learning Research},
  volume       = {108},
  pages        = {2066--2076},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v108/kveton20a.html},
  timestamp    = {Mon, 29 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/KvetonZS0GB20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/HaoLS20,
  author       = {Botao Hao and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Silvia Chiappa and
                  Roberto Calandra},
  title        = {Adaptive Exploration in Linear Contextual Bandit},
  booktitle    = {The 23rd International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2020, 26-28 August 2020, Online [Palermo, Sicily, Italy]},
  series       = {Proceedings of Machine Learning Research},
  volume       = {108},
  pages        = {3536--3545},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v108/hao20b.html},
  timestamp    = {Mon, 29 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/HaoLS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/LattimoreS20,
  author       = {Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Jacob D. Abernethy and
                  Shivani Agarwal},
  title        = {Exploration by Optimisation in Partial Monitoring},
  booktitle    = {Conference on Learning Theory, {COLT} 2020, 9-12 July 2020, Virtual
                  Event [Graz, Austria]},
  series       = {Proceedings of Machine Learning Research},
  volume       = {125},
  pages        = {2488--2515},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v125/lattimore20a.html},
  timestamp    = {Fri, 27 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/colt/LattimoreS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/OsbandDHASSMLSS20,
  author       = {Ian Osband and
                  Yotam Doron and
                  Matteo Hessel and
                  John Aslanides and
                  Eren Sezener and
                  Andre Saraiva and
                  Katrina McKinney and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Satinder Singh and
                  Benjamin Van Roy and
                  Richard S. Sutton and
                  David Silver and
                  Hado van Hasselt},
  title        = {Behaviour Suite for Reinforcement Learning},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=rygf-kSYwH},
  timestamp    = {Mon, 15 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/OsbandDHASSMLSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/AyoubJSWY20,
  author       = {Alex Ayoub and
                  Zeyu Jia and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang and
                  Lin Yang},
  title        = {Model-Based Reinforcement Learning with Value-Targeted Regression},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {463--474},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/ayoub20a.html},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/AyoubJSWY20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/JoulaniRGS20,
  author       = {Pooria Joulani and
                  Anant Raj and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {A simpler approach to accelerated optimization: iterative averaging
                  meets optimism},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {4984--4993},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/joulani20a.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/JoulaniRGS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/LattimoreSW20,
  author       = {Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Gell{\'{e}}rt Weisz},
  title        = {Learning with Good Feature Representations in Bandits and in {RL}
                  with a Generative Model},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {5662--5670},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/lattimore20a.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/LattimoreSW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/MeiXSS20,
  author       = {Jincheng Mei and
                  Chenjun Xiao and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  title        = {On the Global Convergence Rates of Softmax Policy Gradient Methods},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {6820--6829},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/mei20b.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/MeiXSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/l4dc/JiaYSW20,
  author       = {Zeyu Jia and
                  Lin Yang and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  editor       = {Alexandre M. Bayen and
                  Ali Jadbabaie and
                  George J. Pappas and
                  Pablo A. Parrilo and
                  Benjamin Recht and
                  Claire J. Tomlin and
                  Melanie N. Zeilinger},
  title        = {Model-Based Reinforcement Learning with Value-Targeted Regression},
  booktitle    = {Proceedings of the 2nd Annual Conference on Learning for Dynamics
                  and Control, {L4DC} 2020, Online Event, Berkeley, CA, USA, 11-12 June
                  2020},
  series       = {Proceedings of Machine Learning Research},
  volume       = {120},
  pages        = {666--686},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v120/jia20a.html},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/l4dc/JiaYSW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BoutilierHKMSZ20,
  author       = {Craig Boutilier and
                  Chih{-}Wei Hsu and
                  Branislav Kveton and
                  Martin Mladenov and
                  Csaba Szepesv{\'{a}}ri and
                  Manzil Zaheer},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Differentiable Meta-Learning of Bandit Policies},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/171ae1bbb81475eb96287dd78565b38b-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/BoutilierHKMSZ20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/DaiNC0SS20,
  author       = {Bo Dai and
                  Ofir Nachum and
                  Yinlam Chow and
                  Lihong Li and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {CoinDICE: Off-Policy Confidence Interval Estimation},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/6aaba9a124857622930ca4e50f5afed2-Abstract.html},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/DaiNC0SS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/MeiXD0SS20,
  author       = {Jincheng Mei and
                  Chenjun Xiao and
                  Bo Dai and
                  Lihong Li and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Escaping the Gravitational Pull of Softmax},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/f1cf2a082126bf02de0b307778ce73a7-Abstract.html},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/MeiXD0SS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/PacchianoPA0ZLS20,
  author       = {Aldo Pacchiano and
                  My Phan and
                  Yasin Abbasi{-}Yadkori and
                  Anup Rao and
                  Julian Zimmert and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Model Selection in Contextual Stochastic Bandit Problems},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/751d51528afe5e6f7fe95dece4ed32ba-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/PacchianoPA0ZLS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/RivasplataKSS20,
  author       = {Omar Rivasplata and
                  Ilja Kuzborskij and
                  Csaba Szepesv{\'{a}}ri and
                  John Shawe{-}Taylor},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {PAC-Bayes Analysis Beyond the Usual Bounds},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/c3992e9a68c5ae12bd18488bc579b30d-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/RivasplataKSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ShariffS20,
  author       = {Roshan Shariff and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Efficient Planning in Large MDPs with Weak Linear Function Approximation},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/de07edeeba9f475c9395959494cd8f64-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/ShariffS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/VermaHSS20,
  author       = {Arun Verma and
                  Manjesh Kumar Hanawal and
                  Csaba Szepesv{\'{a}}ri and
                  Venkatesh Saligrama},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Online Algorithm for Unsupervised Sequential Selection with Contextual
                  Information},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/08e5d8066881eab185d0de9db3b36c7f-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/VermaHSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Weisz0LGLSL20,
  author       = {Gell{\'{e}}rt Weisz and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Wei{-}I Lin and
                  Devon R. Graham and
                  Kevin Leyton{-}Brown and
                  Csaba Szepesv{\'{a}}ri and
                  Brendan Lucier},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {ImpatientCapsAndRuns: Approximately Optimal Algorithm Configuration
                  from an Infinite Pool},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/ca5520b5672ea120b23bde75c46e76c6-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/Weisz0LGLSL20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZhangKBSW20,
  author       = {Junyu Zhang and
                  Alec Koppel and
                  Amrit Singh Bedi and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Variational Policy Gradient Method for Reinforcement Learning with
                  General Utilities},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/30ee748d38e21392de740e2f9dc686b6-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/ZhangKBSW20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-03069,
  author       = {Botao Hao and
                  Nevena Lazic and
                  Yasin Abbasi{-}Yadkori and
                  Pooria Joulani and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Provably Efficient Adaptive Approximate Policy Iteration},
  journal      = {CoRR},
  volume       = {abs/2002.03069},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.03069},
  eprinttype    = {arXiv},
  eprint       = {2002.03069},
  timestamp    = {Wed, 12 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-03069.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-06772,
  author       = {Craig Boutilier and
                  Chih{-}Wei Hsu and
                  Branislav Kveton and
                  Martin Mladenov and
                  Csaba Szepesv{\'{a}}ri and
                  Manzil Zaheer},
  title        = {Differentiable Bandit Exploration},
  journal      = {CoRR},
  volume       = {abs/2002.06772},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.06772},
  eprinttype    = {arXiv},
  eprint       = {2002.06772},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-06772.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2003-01704,
  author       = {Aldo Pacchiano and
                  My Phan and
                  Yasin Abbasi{-}Yadkori and
                  Anup Rao and
                  Julian Zimmert and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Model Selection in Contextual Stochastic Bandit Problems},
  journal      = {CoRR},
  volume       = {abs/2003.01704},
  year         = {2020},
  url          = {https://arxiv.org/abs/2003.01704},
  eprinttype    = {arXiv},
  eprint       = {2003.01704},
  timestamp    = {Thu, 03 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2003-01704.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2005-06392,
  author       = {Jincheng Mei and
                  Chenjun Xiao and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  title        = {On the Global Convergence Rates of Softmax Policy Gradient Methods},
  journal      = {CoRR},
  volume       = {abs/2005.06392},
  year         = {2020},
  url          = {https://arxiv.org/abs/2005.06392},
  eprinttype    = {arXiv},
  eprint       = {2005.06392},
  timestamp    = {Thu, 14 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2005-06392.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-01107,
  author       = {Alex Ayoub and
                  Zeyu Jia and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang and
                  Lin F. Yang},
  title        = {Model-Based Reinforcement Learning with Value-Targeted Regression},
  journal      = {CoRR},
  volume       = {abs/2006.01107},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.01107},
  eprinttype    = {arXiv},
  eprint       = {2006.01107},
  timestamp    = {Sat, 23 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-01107.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-05094,
  author       = {Branislav Kveton and
                  Martin Mladenov and
                  Chih{-}Wei Hsu and
                  Manzil Zaheer and
                  Csaba Szepesv{\'{a}}ri and
                  Craig Boutilier},
  title        = {Differentiable Meta-Learning in Contextual Bandits},
  journal      = {CoRR},
  volume       = {abs/2006.05094},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.05094},
  eprinttype    = {arXiv},
  eprint       = {2006.05094},
  timestamp    = {Fri, 12 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-05094.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-10460,
  author       = {Ilja Kuzborskij and
                  Claire Vernade and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Confident Off-Policy Evaluation and Selection through Self-Normalized
                  Importance Weighting},
  journal      = {CoRR},
  volume       = {abs/2006.10460},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.10460},
  eprinttype    = {arXiv},
  eprint       = {2006.10460},
  timestamp    = {Tue, 23 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-10460.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-13057,
  author       = {Omar Rivasplata and
                  Ilja Kuzborskij and
                  Csaba Szepesv{\'{a}}ri and
                  John Shawe{-}Taylor},
  title        = {PAC-Bayes Analysis Beyond the Usual Bounds},
  journal      = {CoRR},
  volume       = {abs/2006.13057},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.13057},
  eprinttype    = {arXiv},
  eprint       = {2006.13057},
  timestamp    = {Thu, 02 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-13057.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-02151,
  author       = {Junyu Zhang and
                  Alec Koppel and
                  Amrit Singh Bedi and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  title        = {Variational Policy Gradient Method for Reinforcement Learning with
                  General Utilities},
  journal      = {CoRR},
  volume       = {abs/2007.02151},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.02151},
  eprinttype    = {arXiv},
  eprint       = {2007.02151},
  timestamp    = {Fri, 17 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-02151.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-06184,
  author       = {Roshan Shariff and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Efficient Planning in Large MDPs with Weak Linear Function Approximation},
  journal      = {CoRR},
  volume       = {abs/2007.06184},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.06184},
  eprinttype    = {arXiv},
  eprint       = {2007.06184},
  timestamp    = {Tue, 21 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-06184.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-12911,
  author       = {Mar{\'{\i}}a P{\'{e}}rez{-}Ortiz and
                  Omar Rivasplata and
                  John Shawe{-}Taylor and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Tighter risk certificates for neural networks},
  journal      = {CoRR},
  volume       = {abs/2007.12911},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.12911},
  eprinttype    = {arXiv},
  eprint       = {2007.12911},
  timestamp    = {Mon, 26 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-12911.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-01374,
  author       = {Gell{\'{e}}rt Weisz and
                  Philip Amortila and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Exponential Lower Bounds for Planning in MDPs With Linearly-Realizable
                  Optimal Action-Value Functions},
  journal      = {CoRR},
  volume       = {abs/2010.01374},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.01374},
  eprinttype    = {arXiv},
  eprint       = {2010.01374},
  timestamp    = {Mon, 12 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-01374.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-11652,
  author       = {Bo Dai and
                  Ofir Nachum and
                  Yinlam Chow and
                  Lihong Li and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  title        = {CoinDICE: Off-Policy Confidence Interval Estimation},
  journal      = {CoRR},
  volume       = {abs/2010.11652},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.11652},
  eprinttype    = {arXiv},
  eprint       = {2010.11652},
  timestamp    = {Thu, 29 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-11652.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-12353,
  author       = {Arun Verma and
                  Manjesh Kumar Hanawal and
                  Csaba Szepesv{\'{a}}ri and
                  Venkatesh Saligrama},
  title        = {Online Algorithm for Unsupervised Sequential Selection with Contextual
                  Information},
  journal      = {CoRR},
  volume       = {abs/2010.12353},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.12353},
  eprinttype    = {arXiv},
  eprint       = {2010.12353},
  timestamp    = {Tue, 27 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-12353.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-00344,
  author       = {Mikhail Konobeev and
                  Ilja Kuzborskij and
                  Csaba Szepesv{\'{a}}ri},
  title        = {On Optimality of Meta-Learning in Fixed-Design Regression with Weighted
                  Biased Regularization},
  journal      = {CoRR},
  volume       = {abs/2011.00344},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.00344},
  eprinttype    = {arXiv},
  eprint       = {2011.00344},
  timestamp    = {Mon, 09 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-00344.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-04018,
  author       = {Botao Hao and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  title        = {Online Sparse Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2011.04018},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.04018},
  eprinttype    = {arXiv},
  eprint       = {2011.04018},
  timestamp    = {Thu, 12 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-04018.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-04019,
  author       = {Botao Hao and
                  Yaqi Duan and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Mengdi Wang},
  title        = {Sparse Feature Selection Makes Batch Reinforcement Learning More Sample
                  Efficient},
  journal      = {CoRR},
  volume       = {abs/2011.04019},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.04019},
  eprinttype    = {arXiv},
  eprint       = {2011.04019},
  timestamp    = {Thu, 12 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-04019.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-05944,
  author       = {Johannes Kirschner and
                  Tor Lattimore and
                  Claire Vernade and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Asymptotically Optimal Information-Directed Sampling},
  journal      = {CoRR},
  volume       = {abs/2011.05944},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.05944},
  eprinttype    = {arXiv},
  eprint       = {2011.05944},
  timestamp    = {Thu, 12 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-05944.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2012-08507,
  author       = {Dongruo Zhou and
                  Quanquan Gu and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Nearly Minimax Optimal Reinforcement Learning for Linear Mixture Markov
                  Decision Processes},
  journal      = {CoRR},
  volume       = {abs/2012.08507},
  year         = {2020},
  url          = {https://arxiv.org/abs/2012.08507},
  eprinttype    = {arXiv},
  eprint       = {2012.08507},
  timestamp    = {Sat, 02 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2012-08507.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/Abou-MoustafaS19,
  author       = {Karim T. Abou{-}Moustafa and
                  Csaba Szepesv{\'{a}}ri},
  title        = {An Exponential Tail Bound for the Deleted Estimate},
  booktitle    = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2019, The Thirty-First Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
                  USA, January 27 - February 1, 2019},
  pages        = {3143--3150},
  publisher    = {{AAAI} Press},
  year         = {2019},
  url          = {https://doi.org/10.1609/aaai.v33i01.33013143},
  doi          = {10.1609/AAAI.V33I01.33013143},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/Abou-MoustafaS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/Abbasi-YadkoriL19,
  author       = {Yasin Abbasi{-}Yadkori and
                  Nevena Lazic and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Kamalika Chaudhuri and
                  Masashi Sugiyama},
  title        = {Model-Free Linear Quadratic Control via Reduction to Expert Prediction},
  booktitle    = {The 22nd International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2019, 16-18 April 2019, Naha, Okinawa, Japan},
  series       = {Proceedings of Machine Learning Research},
  volume       = {89},
  pages        = {3108--3117},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v89/abbasi-yadkori19a.html},
  timestamp    = {Fri, 07 Jun 2019 09:03:47 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/Abbasi-YadkoriL19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/VermaHSS19,
  author       = {Arun Verma and
                  Manjesh Kumar Hanawal and
                  Csaba Szepesv{\'{a}}ri and
                  Venkatesh Saligrama},
  editor       = {Kamalika Chaudhuri and
                  Masashi Sugiyama},
  title        = {Online Algorithm for Unsupervised Sensor Selection},
  booktitle    = {The 22nd International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2019, 16-18 April 2019, Naha, Okinawa, Japan},
  series       = {Proceedings of Machine Learning Research},
  volume       = {89},
  pages        = {3168--3176},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v89/verma19a.html},
  timestamp    = {Fri, 09 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/VermaHSS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/Abou-MoustafaS19,
  author       = {Karim T. Abou{-}Moustafa and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Aur{\'{e}}lien Garivier and
                  Satyen Kale},
  title        = {An Exponential Efron-Stein Inequality for \emph{L\({}_{\mbox{q}}\)}
                  Stable Learning Rules},
  booktitle    = {Algorithmic Learning Theory, {ALT} 2019, 22-24 March 2019, Chicago,
                  Illinois, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {98},
  pages        = {31--63},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v98/abou-moustafa19a.html},
  timestamp    = {Tue, 11 Apr 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/Abou-MoustafaS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/LattimoreS19,
  author       = {Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Aur{\'{e}}lien Garivier and
                  Satyen Kale},
  title        = {Cleaning up the neighborhood: {A} full classification for adversarial
                  partial monitoring},
  booktitle    = {Algorithmic Learning Theory, {ALT} 2019, 22-24 March 2019, Chicago,
                  Illinois, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {98},
  pages        = {529--556},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v98/lattimore19a.html},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/LattimoreS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/KuzborskijCS19,
  author       = {Ilja Kuzborskij and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Alina Beygelzimer and
                  Daniel Hsu},
  title        = {Distribution-Dependent Analysis of Gibbs-ERM Principle},
  booktitle    = {Conference on Learning Theory, {COLT} 2019, 25-28 June 2019, Phoenix,
                  AZ, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {99},
  pages        = {2028--2054},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v99/kuzborskij19a.html},
  timestamp    = {Mon, 08 Jul 2019 16:13:41 +0200},
  biburl       = {https://dblp.org/rec/conf/colt/KuzborskijCS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/LattimoreS19,
  author       = {Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Alina Beygelzimer and
                  Daniel Hsu},
  title        = {An Information-Theoretic Approach to Minimax Regret in Partial Monitoring},
  booktitle    = {Conference on Learning Theory, {COLT} 2019, 25-28 June 2019, Phoenix,
                  AZ, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {99},
  pages        = {2111--2139},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v99/lattimore19a.html},
  timestamp    = {Mon, 08 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/colt/LattimoreS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/UesatoKSERADHK19,
  author       = {Jonathan Uesato and
                  Ananya Kumar and
                  Csaba Szepesv{\'{a}}ri and
                  Tom Erez and
                  Avraham Ruderman and
                  Keith Anderson and
                  Krishnamurthy (Dj) Dvijotham and
                  Nicolas Heess and
                  Pushmeet Kohli},
  title        = {Rigorous Agent Evaluation: An Adversarial Approach to Uncover Catastrophic
                  Failures},
  booktitle    = {7th International Conference on Learning Representations, {ICLR} 2019,
                  New Orleans, LA, USA, May 6-9, 2019},
  publisher    = {OpenReview.net},
  year         = {2019},
  url          = {https://openreview.net/forum?id=B1xhQhRcK7},
  timestamp    = {Thu, 25 Jul 2019 13:03:15 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/UesatoKSERADHK19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/KvetonSVWLG19,
  author       = {Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Sharan Vaswani and
                  Zheng Wen and
                  Tor Lattimore and
                  Mohammad Ghavamzadeh},
  editor       = {Kamalika Chaudhuri and
                  Ruslan Salakhutdinov},
  title        = {Garbage In, Reward Out: Bootstrapping Exploration in Multi-Armed Bandits},
  booktitle    = {Proceedings of the 36th International Conference on Machine Learning,
                  {ICML} 2019, 9-15 June 2019, Long Beach, California, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {97},
  pages        = {3601--3610},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v97/kveton19a.html},
  timestamp    = {Tue, 11 Jun 2019 15:37:38 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/KvetonSVWLG19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/X19,
  editor       = {Kamalika Chaudhuri and
                  Ruslan Salakhutdinov},
  title        = {{POLITEX:} Regret Bounds for Policy Iteration using Expert Prediction},
  booktitle    = {Proceedings of the 36th International Conference on Machine Learning,
                  {ICML} 2019, 9-15 June 2019, Long Beach, California, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {97},
  pages        = {3692--3702},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v97/lazic19a.html},
  timestamp    = {Tue, 08 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/X19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/LiLS19,
  author       = {Shuai Li and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Kamalika Chaudhuri and
                  Ruslan Salakhutdinov},
  title        = {Online Learning to Rank with Features},
  booktitle    = {Proceedings of the 36th International Conference on Machine Learning,
                  {ICML} 2019, 9-15 June 2019, Long Beach, California, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {97},
  pages        = {3856--3865},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v97/li19f.html},
  timestamp    = {Tue, 11 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/LiLS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/WeiszGS19,
  author       = {Gell{\'{e}}rt Weisz and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Kamalika Chaudhuri and
                  Ruslan Salakhutdinov},
  title        = {CapsAndRuns: An Improved Method for Approximately Optimal Algorithm
                  Configuration},
  booktitle    = {Proceedings of the 36th International Conference on Machine Learning,
                  {ICML} 2019, 9-15 June 2019, Long Beach, California, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {97},
  pages        = {6707--6715},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v97/weisz19a.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/WeiszGS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/KvetonSGB19,
  author       = {Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Mohammad Ghavamzadeh and
                  Craig Boutilier},
  editor       = {Sarit Kraus},
  title        = {Perturbed-History Exploration in Stochastic Multi-Armed Bandits},
  booktitle    = {Proceedings of the Twenty-Eighth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16,
                  2019},
  pages        = {2786--2793},
  publisher    = {ijcai.org},
  year         = {2019},
  url          = {https://doi.org/10.24963/ijcai.2019/386},
  doi          = {10.24963/IJCAI.2019/386},
  timestamp    = {Tue, 20 Aug 2019 16:18:18 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/KvetonSGB19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/WerpachowskiGS19,
  author       = {Roman Werpachowski and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Detecting Overfitting via Adversarial Examples},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {7856--7866},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/28f7241796510e838db4a1384ae1279d-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/WerpachowskiGS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/JoulaniGS19,
  author       = {Pooria Joulani and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Think out of the "Box": Generically-Constrained Asynchronous
                  Composite Optimization and Hedging},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {12225--12235},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/0224cd598e48c5041c7947fd5cb20d53-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/JoulaniGS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/0003KLMRSZ19,
  author       = {Chang Li and
                  Branislav Kveton and
                  Tor Lattimore and
                  Ilya Markov and
                  Maarten de Rijke and
                  Csaba Szepesv{\'{a}}ri and
                  Masrour Zoghi},
  editor       = {Amir Globerson and
                  Ricardo Silva},
  title        = {BubbleRank: Safe Online Learning to Re-Rank via Implicit Click Feedback},
  booktitle    = {Proceedings of the Thirty-Fifth Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2019, Tel Aviv, Israel, July 22-25, 2019},
  series       = {Proceedings of Machine Learning Research},
  volume       = {115},
  pages        = {196--206},
  publisher    = {{AUAI} Press},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v115/li20b.html},
  timestamp    = {Tue, 15 Dec 2020 17:40:18 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/0003KLMRSZ19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/KvetonSGB19,
  author       = {Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Mohammad Ghavamzadeh and
                  Craig Boutilier},
  editor       = {Amir Globerson and
                  Ricardo Silva},
  title        = {Perturbed-History Exploration in Stochastic Linear Bandits},
  booktitle    = {Proceedings of the Thirty-Fifth Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2019, Tel Aviv, Israel, July 22-25, 2019},
  series       = {Proceedings of Machine Learning Research},
  volume       = {115},
  pages        = {530--540},
  publisher    = {{AUAI} Press},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v115/kveton20a.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/KvetonSGB19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-04676,
  author       = {Arun Verma and
                  Manjesh Kumar Hanawal and
                  Csaba Szepesv{\'{a}}ri and
                  Venkatesh Saligrama},
  title        = {Online Algorithm for Unsupervised Sensor Selection},
  journal      = {CoRR},
  volume       = {abs/1901.04676},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.04676},
  eprinttype    = {arXiv},
  eprint       = {1901.04676},
  timestamp    = {Fri, 09 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-04676.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1902-00470,
  author       = {Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  title        = {An Information-Theoretic Approach to Minimax Regret in Partial Monitoring},
  journal      = {CoRR},
  volume       = {abs/1902.00470},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.00470},
  eprinttype    = {arXiv},
  eprint       = {1902.00470},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-00470.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1902-01846,
  author       = {Ilja Kuzborskij and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Distribution-Dependent Analysis of Gibbs-ERM Principle},
  journal      = {CoRR},
  volume       = {abs/1902.01846},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.01846},
  eprinttype    = {arXiv},
  eprint       = {1902.01846},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-01846.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1902-10089,
  author       = {Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Mohammad Ghavamzadeh and
                  Craig Boutilier},
  title        = {Perturbed-History Exploration in Stochastic Multi-Armed Bandits},
  journal      = {CoRR},
  volume       = {abs/1902.10089},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.10089},
  eprinttype    = {arXiv},
  eprint       = {1902.10089},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-10089.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1903-02380,
  author       = {Roman Werpachowski and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Detecting Overfitting via Adversarial Examples},
  journal      = {CoRR},
  volume       = {abs/1903.02380},
  year         = {2019},
  url          = {http://arxiv.org/abs/1903.02380},
  eprinttype    = {arXiv},
  eprint       = {1903.02380},
  timestamp    = {Sat, 23 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1903-02380.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1903-05457,
  author       = {Karim T. Abou{-}Moustafa and
                  Csaba Szepesv{\'{a}}ri},
  title        = {An Exponential Efron-Stein Inequality for Lq Stable Learning Rules},
  journal      = {CoRR},
  volume       = {abs/1903.05457},
  year         = {2019},
  url          = {http://arxiv.org/abs/1903.05457},
  eprinttype    = {arXiv},
  eprint       = {1903.05457},
  timestamp    = {Tue, 02 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1903-05457.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1903-09132,
  author       = {Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Mohammad Ghavamzadeh and
                  Craig Boutilier},
  title        = {Perturbed-History Exploration in Stochastic Linear Bandits},
  journal      = {CoRR},
  volume       = {abs/1903.09132},
  year         = {2019},
  url          = {http://arxiv.org/abs/1903.09132},
  eprinttype    = {arXiv},
  eprint       = {1903.09132},
  timestamp    = {Mon, 01 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1903-09132.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-02664,
  author       = {Chih{-}Wei Hsu and
                  Branislav Kveton and
                  Ofer Meshi and
                  Martin Mladenov and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Empirical Bayes Regret Minimization},
  journal      = {CoRR},
  volume       = {abs/1904.02664},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.02664},
  eprinttype    = {arXiv},
  eprint       = {1904.02664},
  timestamp    = {Wed, 24 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-02664.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-11608,
  author       = {Yao Ma and
                  Alex Olshevsky and
                  Venkatesh Saligrama and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Gradient Descent for Sparse Rank-One Matrix Completion for Crowd-Sourced
                  Aggregation of Sparsely Interacting Workers},
  journal      = {CoRR},
  volume       = {abs/1904.11608},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.11608},
  eprinttype    = {arXiv},
  eprint       = {1904.11608},
  timestamp    = {Thu, 02 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-11608.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-08947,
  author       = {Branislav Kveton and
                  Manzil Zaheer and
                  Csaba Szepesv{\'{a}}ri and
                  Lihong Li and
                  Mohammad Ghavamzadeh and
                  Craig Boutilier},
  title        = {Randomized Exploration in Generalized Linear Bandits},
  journal      = {CoRR},
  volume       = {abs/1906.08947},
  year         = {2019},
  url          = {http://arxiv.org/abs/1906.08947},
  eprinttype    = {arXiv},
  eprint       = {1906.08947},
  timestamp    = {Sun, 30 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1906-08947.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1907-05772,
  author       = {Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Exploration by Optimisation in Partial Monitoring},
  journal      = {CoRR},
  volume       = {abs/1907.05772},
  year         = {2019},
  url          = {http://arxiv.org/abs/1907.05772},
  eprinttype    = {arXiv},
  eprint       = {1907.05772},
  timestamp    = {Wed, 17 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1907-05772.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1908-03568,
  author       = {Ian Osband and
                  Yotam Doron and
                  Matteo Hessel and
                  John Aslanides and
                  Eren Sezener and
                  Andre Saraiva and
                  Katrina McKinney and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Satinder Singh and
                  Benjamin Van Roy and
                  Richard S. Sutton and
                  David Silver and
                  Hado van Hasselt},
  title        = {Behaviour Suite for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1908.03568},
  year         = {2019},
  url          = {http://arxiv.org/abs/1908.03568},
  eprinttype    = {arXiv},
  eprint       = {1908.03568},
  timestamp    = {Mon, 15 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1908-03568.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1908-07380,
  author       = {Omar Rivasplata and
                  Vikram M. Tankasali and
                  Csaba Szepesv{\'{a}}ri},
  title        = {PAC-Bayes with Backprop},
  journal      = {CoRR},
  volume       = {abs/1908.07380},
  year         = {2019},
  url          = {http://arxiv.org/abs/1908.07380},
  eprinttype    = {arXiv},
  eprint       = {1908.07380},
  timestamp    = {Mon, 26 Aug 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1908-07380.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1908-10479,
  author       = {Yasin Abbasi{-}Yadkori and
                  Nevena Lazic and
                  Csaba Szepesv{\'{a}}ri and
                  Gell{\'{e}}rt Weisz},
  title        = {Exploration-Enhanced {POLITEX}},
  journal      = {CoRR},
  volume       = {abs/1908.10479},
  year         = {2019},
  url          = {http://arxiv.org/abs/1908.10479},
  eprinttype    = {arXiv},
  eprint       = {1908.10479},
  timestamp    = {Thu, 29 Aug 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1908-10479.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1909-01931,
  author       = {Ilja Kuzborskij and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Efron-Stein PAC-Bayesian Inequalities},
  journal      = {CoRR},
  volume       = {abs/1909.01931},
  year         = {2019},
  url          = {http://arxiv.org/abs/1909.01931},
  eprinttype    = {arXiv},
  eprint       = {1909.01931},
  timestamp    = {Mon, 16 Sep 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1909-01931.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-06996,
  author       = {Botao Hao and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Adaptive Exploration in Linear Contextual Bandit},
  journal      = {CoRR},
  volume       = {abs/1910.06996},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.06996},
  eprinttype    = {arXiv},
  eprint       = {1910.06996},
  timestamp    = {Tue, 22 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-06996.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-08446,
  author       = {Pratik Gajane and
                  Ronald Ortner and
                  Peter Auer and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Autonomous exploration for navigating in non-stationary CMPs},
  journal      = {CoRR},
  volume       = {abs/1910.08446},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.08446},
  eprinttype    = {arXiv},
  eprint       = {1910.08446},
  timestamp    = {Tue, 22 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-08446.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-07676,
  author       = {Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Learning with Good Feature Representations in Bandits and in {RL}
                  with a Generative Model},
  journal      = {CoRR},
  volume       = {abs/1911.07676},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.07676},
  eprinttype    = {arXiv},
  eprint       = {1911.07676},
  timestamp    = {Wed, 04 Dec 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-07676.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tac/Lakshminarayanan18,
  author       = {Chandrashekar Lakshminarayanan and
                  Shalabh Bhatnagar and
                  Csaba Szepesv{\'{a}}ri},
  title        = {A Linearly Relaxed Approximate Linear Program for Markov Decision
                  Processes},
  journal      = {{IEEE} Trans. Autom. Control.},
  volume       = {63},
  number       = {4},
  pages        = {1185--1191},
  year         = {2018},
  url          = {https://doi.org/10.1109/TAC.2017.2743163},
  doi          = {10.1109/TAC.2017.2743163},
  timestamp    = {Wed, 20 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tac/Lakshminarayanan18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tac/JieAFMS18,
  author       = {Cheng Jie and
                  Prashanth L. A. and
                  Michael C. Fu and
                  Steven I. Marcus and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Stochastic Optimization in a Cumulative Prospect Theory Framework},
  journal      = {{IEEE} Trans. Autom. Control.},
  volume       = {63},
  number       = {9},
  pages        = {2867--2882},
  year         = {2018},
  url          = {https://doi.org/10.1109/TAC.2018.2822658},
  doi          = {10.1109/TAC.2018.2822658},
  timestamp    = {Wed, 20 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tac/JieAFMS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/Lakshminarayanan18,
  author       = {Chandrashekar Lakshminarayanan and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Amos J. Storkey and
                  Fernando P{\'{e}}rez{-}Cruz},
  title        = {Linear Stochastic Approximation: How Far Does Constant Step-Size and
                  Iterate Averaging Go?},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2018, 9-11 April 2018, Playa Blanca, Lanzarote, Canary Islands,
                  Spain},
  series       = {Proceedings of Machine Learning Research},
  volume       = {84},
  pages        = {1347--1355},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v84/lakshminarayanan18a.html},
  timestamp    = {Wed, 03 Apr 2019 18:17:22 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/Lakshminarayanan18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/MaOSS18,
  author       = {Yao Ma and
                  Alexander Olshevsky and
                  Csaba Szepesv{\'{a}}ri and
                  Venkatesh Saligrama},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {Gradient Descent for Sparse Rank-One Matrix Completion for Crowd-Sourced
                  Aggregation of Sparsely Interacting Workers},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {3341--3350},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/ma18b.html},
  timestamp    = {Wed, 03 Apr 2019 18:17:30 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/MaOSS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/Pike-Burke0SG18,
  author       = {Ciara Pike{-}Burke and
                  Shipra Agrawal and
                  Csaba Szepesv{\'{a}}ri and
                  Steffen Gr{\"{u}}new{\"{a}}lder},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {Bandits with Delayed, Aggregated Anonymous Feedback},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {4102--4110},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/pike-burke18a.html},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/Pike-Burke0SG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/WeiszGS18,
  author       = {Gell{\'{e}}rt Weisz and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {{LEAPSANDBOUNDS:} {A} Method for Approximately Optimal Algorithm Configuration},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {5254--5262},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/weisz18a.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/WeiszGS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/isaim/Abou-MoustafaS18,
  author       = {Karim T. Abou{-}Moustafa and
                  Csaba Szepesv{\'{a}}ri},
  title        = {An Exponential Tail Bound for Lq Stable Learning Rules. Application
                  to k-Folds Cross-Validation},
  booktitle    = {International Symposium on Artificial Intelligence and Mathematics,
                  {ISAIM} 2018, Fort Lauderdale, Florida, USA, January 3-5, 2018},
  year         = {2018},
  url          = {https://isaim2018.cs.ou.edu/papers/ISAIM2018\_Abou-Moustafa\_Szepesvari.pdf},
  timestamp    = {Wed, 20 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/isaim/Abou-MoustafaS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LattimoreKLS18,
  author       = {Tor Lattimore and
                  Branislav Kveton and
                  Shuai Li and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Samy Bengio and
                  Hanna M. Wallach and
                  Hugo Larochelle and
                  Kristen Grauman and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Roman Garnett},
  title        = {TopRank: {A} practical algorithm for online stochastic ranking},
  booktitle    = {Advances in Neural Information Processing Systems 31: Annual Conference
                  on Neural Information Processing Systems 2018, NeurIPS 2018, December
                  3-8, 2018, Montr{\'{e}}al, Canada},
  pages        = {3949--3958},
  year         = {2018},
  url          = {https://proceedings.neurips.cc/paper/2018/hash/de03beffeed9da5f3639a621bcab5dd4-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/LattimoreKLS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/RivasplataSSPS18,
  author       = {Omar Rivasplata and
                  Csaba Szepesv{\'{a}}ri and
                  John Shawe{-}Taylor and
                  Emilio Parrado{-}Hern{\'{a}}ndez and
                  Shiliang Sun},
  editor       = {Samy Bengio and
                  Hanna M. Wallach and
                  Hugo Larochelle and
                  Kristen Grauman and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Roman Garnett},
  title        = {PAC-Bayes bounds for stable algorithms with instance-dependent priors},
  booktitle    = {Advances in Neural Information Processing Systems 31: Annual Conference
                  on Neural Information Processing Systems 2018, NeurIPS 2018, December
                  3-8, 2018, Montr{\'{e}}al, Canada},
  pages        = {9234--9244},
  year         = {2018},
  url          = {https://proceedings.neurips.cc/paper/2018/hash/386854131f58a556343e056f03626e00-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/RivasplataSSPS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1804-06021,
  author       = {Yasin Abbasi{-}Yadkori and
                  Nevena Lazic and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Regret Bounds for Model-Free Linear Quadratic Control},
  journal      = {CoRR},
  volume       = {abs/1804.06021},
  year         = {2018},
  url          = {http://arxiv.org/abs/1804.06021},
  eprinttype    = {arXiv},
  eprint       = {1804.06021},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1804-06021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1805-09247,
  author       = {Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Cleaning up the neighborhood: {A} full classification for adversarial
                  partial monitoring},
  journal      = {CoRR},
  volume       = {abs/1805.09247},
  year         = {2018},
  url          = {http://arxiv.org/abs/1805.09247},
  eprinttype    = {arXiv},
  eprint       = {1805.09247},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1805-09247.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-02248,
  author       = {Tor Lattimore and
                  Branislav Kveton and
                  Shuai Li and
                  Csaba Szepesv{\'{a}}ri},
  title        = {TopRank: {A} practical algorithm for online stochastic ranking},
  journal      = {CoRR},
  volume       = {abs/1806.02248},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.02248},
  eprinttype    = {arXiv},
  eprint       = {1806.02248},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-02248.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-05819,
  author       = {Branislav Kveton and
                  Chang Li and
                  Tor Lattimore and
                  Ilya Markov and
                  Maarten de Rijke and
                  Csaba Szepesv{\'{a}}ri and
                  Masrour Zoghi},
  title        = {BubbleRank: Safe Online Learning to Rerank},
  journal      = {CoRR},
  volume       = {abs/1806.05819},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.05819},
  eprinttype    = {arXiv},
  eprint       = {1806.05819},
  timestamp    = {Wed, 09 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-05819.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-06827,
  author       = {Omar Rivasplata and
                  Emilio Parrado{-}Hern{\'{a}}ndez and
                  John Shawe{-}Taylor and
                  Shiliang Sun and
                  Csaba Szepesv{\'{a}}ri},
  title        = {PAC-Bayes bounds for stable algorithms with instance-dependent priors},
  journal      = {CoRR},
  volume       = {abs/1806.06827},
  year         = {2018},
  url          = {http://arxiv.org/abs/1806.06827},
  eprinttype    = {arXiv},
  eprint       = {1806.06827},
  timestamp    = {Sat, 23 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-06827.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1807-00755,
  author       = {Gell{\'{e}}rt Weisz and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {LeapsAndBounds: {A} Method for Approximately Optimal Algorithm Configuration},
  journal      = {CoRR},
  volume       = {abs/1807.00755},
  year         = {2018},
  url          = {http://arxiv.org/abs/1807.00755},
  eprinttype    = {arXiv},
  eprint       = {1807.00755},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1807-00755.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1810-02567,
  author       = {Shuai Li and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Online Learning to Rank with Features},
  journal      = {CoRR},
  volume       = {abs/1810.02567},
  year         = {2018},
  url          = {http://arxiv.org/abs/1810.02567},
  eprinttype    = {arXiv},
  eprint       = {1810.02567},
  timestamp    = {Thu, 01 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1810-02567.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1811-05154,
  author       = {Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Zheng Wen and
                  Mohammad Ghavamzadeh and
                  Tor Lattimore},
  title        = {Garbage In, Reward Out: Bootstrapping Exploration in Multi-Armed Bandits},
  journal      = {CoRR},
  volume       = {abs/1811.05154},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.05154},
  eprinttype    = {arXiv},
  eprint       = {1811.05154},
  timestamp    = {Sat, 24 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-05154.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1812-01647,
  author       = {Jonathan Uesato and
                  Ananya Kumar and
                  Csaba Szepesv{\'{a}}ri and
                  Tom Erez and
                  Avraham Ruderman and
                  Keith Anderson and
                  Krishnamurthy Dvijotham and
                  Nicolas Heess and
                  Pushmeet Kohli},
  title        = {Rigorous Agent Evaluation: An Adversarial Approach to Uncover Catastrophic
                  Failures},
  journal      = {CoRR},
  volume       = {abs/1812.01647},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.01647},
  eprinttype    = {arXiv},
  eprint       = {1812.01647},
  timestamp    = {Tue, 01 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-01647.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/HuangLGS17,
  author       = {Ruitong Huang and
                  Tor Lattimore and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Following the Leader and Fast Rates in Online Linear Prediction: Curved
                  Constraint Sets and Other Regularities},
  journal      = {J. Mach. Learn. Res.},
  volume       = {18},
  pages        = {145:1--145:31},
  year         = {2017},
  url          = {http://jmlr.org/papers/v18/17-079.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/HuangLGS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/KatariyaKSVW17,
  author       = {Sumeet Katariya and
                  Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Claire Vernade and
                  Zheng Wen},
  editor       = {Aarti Singh and
                  Xiaojin (Jerry) Zhu},
  title        = {Stochastic Rank-1 Bandits},
  booktitle    = {Proceedings of the 20th International Conference on Artificial Intelligence
                  and Statistics, {AISTATS} 2017, 20-22 April 2017, Fort Lauderdale,
                  FL, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {54},
  pages        = {392--401},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v54/katariya17a.html},
  timestamp    = {Wed, 29 May 2019 08:41:44 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/KatariyaKSVW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/LattimoreS17,
  author       = {Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Aarti Singh and
                  Xiaojin (Jerry) Zhu},
  title        = {The End of Optimism? An Asymptotic Analysis of Finite-Armed Linear
                  Bandits},
  booktitle    = {Proceedings of the 20th International Conference on Artificial Intelligence
                  and Statistics, {AISTATS} 2017, 20-22 April 2017, Fort Lauderdale,
                  FL, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {54},
  pages        = {728--737},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v54/lattimore17a.html},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/LattimoreS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/HanawalSS17,
  author       = {Manjesh Kumar Hanawal and
                  Csaba Szepesv{\'{a}}ri and
                  Venkatesh Saligrama},
  editor       = {Aarti Singh and
                  Xiaojin (Jerry) Zhu},
  title        = {Unsupervised Sequential Sensor Acquisition},
  booktitle    = {Proceedings of the 20th International Conference on Artificial Intelligence
                  and Statistics, {AISTATS} 2017, 20-22 April 2017, Fort Lauderdale,
                  FL, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {54},
  pages        = {803--811},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v54/hanawal17a.html},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/HanawalSS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/HuangAS017,
  author       = {Ruitong Huang and
                  Mohammad M. Ajallooeian and
                  Csaba Szepesv{\'{a}}ri and
                  Martin M{\"{u}}ller},
  editor       = {Steve Hanneke and
                  Lev Reyzin},
  title        = {Structured Best Arm Identification with Fixed Confidence},
  booktitle    = {International Conference on Algorithmic Learning Theory, {ALT} 2017,
                  15-17 October 2017, Kyoto University, Kyoto, Japan},
  series       = {Proceedings of Machine Learning Research},
  volume       = {76},
  pages        = {593--616},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v76/huang17a.html},
  timestamp    = {Wed, 03 Apr 2019 18:17:23 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/HuangAS017.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/JoulaniGS17,
  author       = {Pooria Joulani and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Steve Hanneke and
                  Lev Reyzin},
  title        = {A Modular Analysis of Adaptive (Non-)Convex Optimization: Optimism,
                  Composite Objectives, and Variational Bounds},
  booktitle    = {International Conference on Algorithmic Learning Theory, {ALT} 2017,
                  15-17 October 2017, Kyoto University, Kyoto, Japan},
  series       = {Proceedings of Machine Learning Research},
  volume       = {76},
  pages        = {681--720},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v76/joulani17a.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/JoulaniGS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/ZoghiTGKSW17,
  author       = {Masrour Zoghi and
                  Tom{\'{a}}s Tunys and
                  Mohammad Ghavamzadeh and
                  Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Zheng Wen},
  editor       = {Doina Precup and
                  Yee Whye Teh},
  title        = {Online Learning to Rank in Stochastic Click Models},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning,
                  {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
  series       = {Proceedings of Machine Learning Research},
  volume       = {70},
  pages        = {4199--4208},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v70/zoghi17a.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/ZoghiTGKSW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/KatariyaKSVW17,
  author       = {Sumeet Katariya and
                  Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Claire Vernade and
                  Zheng Wen},
  editor       = {Carles Sierra},
  title        = {Bernoulli Rank-1 Bandits for Click Feedback},
  booktitle    = {Proceedings of the Twenty-Sixth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August
                  19-25, 2017},
  pages        = {2001--2007},
  publisher    = {ijcai.org},
  year         = {2017},
  url          = {https://doi.org/10.24963/ijcai.2017/278},
  doi          = {10.24963/IJCAI.2017/278},
  timestamp    = {Tue, 20 Aug 2019 16:16:54 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/KatariyaKSVW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/KaramiWSS17,
  author       = {Mahdi Karami and
                  Martha White and
                  Dale Schuurmans and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Multi-view Matrix Factorization for Linear Dynamical System Estimation},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {7092--7101},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/c2964caac096f26db222cb325aa267cb-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 13:58:27 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/KaramiWSS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HuangLGS17,
  author       = {Ruitong Huang and
                  Tor Lattimore and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Following the Leader and Fast Rates in Linear Prediction: Curved Constraint
                  Sets and Other Regularities},
  journal      = {CoRR},
  volume       = {abs/1702.03040},
  year         = {2017},
  url          = {http://arxiv.org/abs/1702.03040},
  eprinttype    = {arXiv},
  eprint       = {1702.03040},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HuangLGS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/GhavamzadehKSTW17,
  author       = {Mohammad Ghavamzadeh and
                  Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Tom{\'{a}}s Tunys and
                  Zheng Wen and
                  Masrour Zoghi},
  title        = {Online Learning to Rank in Stochastic Click Models},
  journal      = {CoRR},
  volume       = {abs/1703.02527},
  year         = {2017},
  url          = {http://arxiv.org/abs/1703.02527},
  eprinttype    = {arXiv},
  eprint       = {1703.02527},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/GhavamzadehKSTW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/KatariyaKSVW17,
  author       = {Sumeet Katariya and
                  Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Claire Vernade and
                  Zheng Wen},
  title        = {Bernoulli Rank-1 Bandits for Click Feedback},
  journal      = {CoRR},
  volume       = {abs/1703.06513},
  year         = {2017},
  url          = {http://arxiv.org/abs/1703.06513},
  eprinttype    = {arXiv},
  eprint       = {1703.06513},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KatariyaKSVW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/Lakshminarayanan17,
  author       = {Chandrashekar Lakshminarayanan and
                  Shalabh Bhatnagar and
                  Csaba Szepesv{\'{a}}ri},
  title        = {A Linearly Relaxed Approximate Linear Program for Markov Decision
                  Processes},
  journal      = {CoRR},
  volume       = {abs/1704.02544},
  year         = {2017},
  url          = {http://arxiv.org/abs/1704.02544},
  eprinttype    = {arXiv},
  eprint       = {1704.02544},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/Lakshminarayanan17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HuangASM17,
  author       = {Ruitong Huang and
                  Mohammad M. Ajallooeian and
                  Csaba Szepesv{\'{a}}ri and
                  Martin M{\"{u}}ller},
  title        = {Structured Best Arm Identification with Fixed Confidence},
  journal      = {CoRR},
  volume       = {abs/1706.05198},
  year         = {2017},
  url          = {http://arxiv.org/abs/1706.05198},
  eprinttype    = {arXiv},
  eprint       = {1706.05198},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HuangASM17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/Abou-MoustafaS17,
  author       = {Karim T. Abou{-}Moustafa and
                  Csaba Szepesv{\'{a}}ri},
  title        = {An a Priori Exponential Tail Bound for k-Folds Cross-Validation},
  journal      = {CoRR},
  volume       = {abs/1706.05801},
  year         = {2017},
  url          = {http://arxiv.org/abs/1706.05801},
  eprinttype    = {arXiv},
  eprint       = {1706.05801},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/Abou-MoustafaS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/MaOSS17,
  author       = {Yao Ma and
                  Alex Olshevsky and
                  Venkatesh Saligrama and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Crowdsourcing with Sparsely Interacting Workers},
  journal      = {CoRR},
  volume       = {abs/1706.06660},
  year         = {2017},
  url          = {http://arxiv.org/abs/1706.06660},
  eprinttype    = {arXiv},
  eprint       = {1706.06660},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/MaOSS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1708-07367,
  author       = {Daniel J. Hsu and
                  Aryeh Kontorovich and
                  David A. Levin and
                  Yuval Peres and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Mixing time estimation in reversible Markov chains from a single sample
                  path},
  journal      = {CoRR},
  volume       = {abs/1708.07367},
  year         = {2017},
  url          = {http://arxiv.org/abs/1708.07367},
  eprinttype    = {arXiv},
  eprint       = {1708.07367},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1708-07367.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1709-02726,
  author       = {Pooria Joulani and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {A Modular Analysis of Adaptive (Non-)Convex Optimization: Optimism,
                  Composite Objectives, and Variational Bounds},
  journal      = {CoRR},
  volume       = {abs/1709.02726},
  year         = {2017},
  url          = {http://arxiv.org/abs/1709.02726},
  eprinttype    = {arXiv},
  eprint       = {1709.02726},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1709-02726.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1709-04073,
  author       = {Chandrashekar Lakshminarayanan and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Linear Stochastic Approximation: Constant Step-Size and Iterate Averaging},
  journal      = {CoRR},
  volume       = {abs/1709.04073},
  year         = {2017},
  url          = {http://arxiv.org/abs/1709.04073},
  eprinttype    = {arXiv},
  eprint       = {1709.04073},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1709-04073.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1709-06853,
  author       = {Ciara Pike{-}Burke and
                  Shipra Agrawal and
                  Csaba Szepesv{\'{a}}ri and
                  Steffen Gr{\"{u}}new{\"{a}}lder},
  title        = {Bandits with Delayed Anonymous Feedback},
  journal      = {CoRR},
  volume       = {abs/1709.06853},
  year         = {2017},
  url          = {http://arxiv.org/abs/1709.06853},
  eprinttype    = {arXiv},
  eprint       = {1709.06853},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1709-06853.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1712-04644,
  author       = {Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Anup Rao and
                  Zheng Wen and
                  Yasin Abbasi{-}Yadkori and
                  S. Muthukrishnan},
  title        = {Stochastic Low-Rank Bandits},
  journal      = {CoRR},
  volume       = {abs/1712.04644},
  year         = {2017},
  url          = {http://arxiv.org/abs/1712.04644},
  eprinttype    = {arXiv},
  eprint       = {1712.04644},
  timestamp    = {Mon, 10 May 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1712-04644.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/FarahmandGSM16,
  author       = {Amir{-}massoud Farahmand and
                  Mohammad Ghavamzadeh and
                  Csaba Szepesv{\'{a}}ri and
                  Shie Mannor},
  title        = {Regularized Policy Iteration with Nonparametric Function Spaces},
  journal      = {J. Mach. Learn. Res.},
  volume       = {17},
  pages        = {139:1--139:66},
  year         = {2016},
  url          = {http://jmlr.org/papers/v17/13-016.html},
  timestamp    = {Wed, 10 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/FarahmandGSM16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/JoulaniGS16,
  author       = {Pooria Joulani and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Dale Schuurmans and
                  Michael P. Wellman},
  title        = {Delay-Tolerant Online Convex Optimization: Unified Analysis and Adaptive-Gradient
                  Algorithms},
  booktitle    = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence,
                  February 12-17, 2016, Phoenix, Arizona, {USA}},
  pages        = {1744--1750},
  publisher    = {{AAAI} Press},
  year         = {2016},
  url          = {https://doi.org/10.1609/aaai.v30i1.10320},
  doi          = {10.1609/AAAI.V30I1.10320},
  timestamp    = {Mon, 04 Sep 2023 15:08:28 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/JoulaniGS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/LeverSSS16,
  author       = {Guy Lever and
                  John Shawe{-}Taylor and
                  Ronnie Stafford and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Dale Schuurmans and
                  Michael P. Wellman},
  title        = {Compressed Conditional Mean Embeddings for Model-Based Reinforcement
                  Learning},
  booktitle    = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence,
                  February 12-17, 2016, Phoenix, Arizona, {USA}},
  pages        = {1779--1787},
  publisher    = {{AAAI} Press},
  year         = {2016},
  url          = {https://doi.org/10.1609/aaai.v30i1.10304},
  doi          = {10.1609/AAAI.V30I1.10304},
  timestamp    = {Mon, 04 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/LeverSSS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/HuAGS16,
  author       = {Xiaowei Hu and
                  Prashanth L. A. and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Arthur Gretton and
                  Christian C. Robert},
  title        = {(Bandit) Convex Optimization with Biased Noisy Gradient Oracles},
  booktitle    = {Proceedings of the 19th International Conference on Artificial Intelligence
                  and Statistics, {AISTATS} 2016, Cadiz, Spain, May 9-11, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {51},
  pages        = {819--828},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v51/hu16b.html},
  timestamp    = {Tue, 15 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/aistats/HuAGS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/KatariyaKSW16,
  author       = {Sumeet Katariya and
                  Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Zheng Wen},
  editor       = {Maria{-}Florina Balcan and
                  Kilian Q. Weinberger},
  title        = {{DCM} Bandits: Learning to Rank with Multiple Clicks},
  booktitle    = {Proceedings of the 33nd International Conference on Machine Learning,
                  {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {48},
  pages        = {1215--1224},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v48/katariya16.html},
  timestamp    = {Wed, 29 May 2019 08:41:46 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/KatariyaKSW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/WuSLS16,
  author       = {Yifan Wu and
                  Roshan Shariff and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Maria{-}Florina Balcan and
                  Kilian Q. Weinberger},
  title        = {Conservative Bandits},
  booktitle    = {Proceedings of the 33nd International Conference on Machine Learning,
                  {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {48},
  pages        = {1254--1262},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v48/wu16.html},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/WuSLS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/AJFMS16,
  author       = {Prashanth L. A. and
                  Cheng Jie and
                  Michael C. Fu and
                  Steven I. Marcus and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Maria{-}Florina Balcan and
                  Kilian Q. Weinberger},
  title        = {Cumulative Prospect Theory Meets Reinforcement Learning: Prediction
                  and Control},
  booktitle    = {Proceedings of the 33nd International Conference on Machine Learning,
                  {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {48},
  pages        = {1406--1415},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v48/la16.html},
  timestamp    = {Wed, 20 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/AJFMS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/GyorgyS16,
  author       = {Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Maria{-}Florina Balcan and
                  Kilian Q. Weinberger},
  title        = {Shifting Regret, Mirror Descent, and Matrices},
  booktitle    = {Proceedings of the 33nd International Conference on Machine Learning,
                  {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {48},
  pages        = {2943--2951},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v48/gyorgy16.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/GyorgyS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HuangLGS16,
  author       = {Ruitong Huang and
                  Tor Lattimore and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Daniel D. Lee and
                  Masashi Sugiyama and
                  Ulrike von Luxburg and
                  Isabelle Guyon and
                  Roman Garnett},
  title        = {Following the Leader and Fast Rates in Linear Prediction: Curved Constraint
                  Sets and Other Regularities},
  booktitle    = {Advances in Neural Information Processing Systems 29: Annual Conference
                  on Neural Information Processing Systems 2016, December 5-10, 2016,
                  Barcelona, Spain},
  pages        = {4970--4978},
  year         = {2016},
  url          = {https://proceedings.neurips.cc/paper/2016/hash/55a988dfb00a914717b3000a3374694c-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/HuangLGS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ShaloudegiGSX16,
  author       = {Kiarash Shaloudegi and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri and
                  Wilsun Xu},
  editor       = {Daniel D. Lee and
                  Masashi Sugiyama and
                  Ulrike von Luxburg and
                  Isabelle Guyon and
                  Roman Garnett},
  title        = {{SDP} Relaxation with Randomized Rounding for Energy Disaggregation},
  booktitle    = {Advances in Neural Information Processing Systems 29: Annual Conference
                  on Neural Information Processing Systems 2016, December 5-10, 2016,
                  Barcelona, Spain},
  pages        = {4979--4987},
  year         = {2016},
  url          = {https://proceedings.neurips.cc/paper/2016/hash/4de754248c196c85ee4fbdcee89179bd-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/ShaloudegiGSX16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/KatariyaKSW16,
  author       = {Sumeet Katariya and
                  Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Zheng Wen},
  title        = {{DCM} Bandits: Learning to Rank with Multiple Clicks},
  journal      = {CoRR},
  volume       = {abs/1602.03146},
  year         = {2016},
  url          = {http://arxiv.org/abs/1602.03146},
  eprinttype    = {arXiv},
  eprint       = {1602.03146},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KatariyaKSW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/WuSLS16,
  author       = {Yifan Wu and
                  Roshan Shariff and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Conservative Bandits},
  journal      = {CoRR},
  volume       = {abs/1602.04282},
  year         = {2016},
  url          = {http://arxiv.org/abs/1602.04282},
  eprinttype    = {arXiv},
  eprint       = {1602.04282},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/WuSLS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/PiresS16,
  author       = {Bernardo {\'{A}}vila Pires and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Policy Error Bounds for Model-Based Reinforcement Learning with Factored
                  Linear Models},
  journal      = {CoRR},
  volume       = {abs/1602.06346},
  year         = {2016},
  url          = {http://arxiv.org/abs/1602.06346},
  eprinttype    = {arXiv},
  eprint       = {1602.06346},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/PiresS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/KatariyaKSVW16,
  author       = {Sumeet Katariya and
                  Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Claire Vernade and
                  Zheng Wen},
  title        = {Stochastic Rank-1 Bandits},
  journal      = {CoRR},
  volume       = {abs/1608.03023},
  year         = {2016},
  url          = {http://arxiv.org/abs/1608.03023},
  eprinttype    = {arXiv},
  eprint       = {1608.03023},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KatariyaKSVW16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/BalazsGS16,
  author       = {G{\'{a}}bor Bal{\'{a}}zs and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Chaining Bounds for Empirical Risk Minimization},
  journal      = {CoRR},
  volume       = {abs/1609.01872},
  year         = {2016},
  url          = {http://arxiv.org/abs/1609.01872},
  eprinttype    = {arXiv},
  eprint       = {1609.01872},
  timestamp    = {Mon, 25 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/BalazsGS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/BalazsGS16a,
  author       = {G{\'{a}}bor Bal{\'{a}}zs and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Max-affine estimators for convex stochastic programming},
  journal      = {CoRR},
  volume       = {abs/1609.06331},
  year         = {2016},
  url          = {http://arxiv.org/abs/1609.06331},
  eprinttype    = {arXiv},
  eprint       = {1609.06331},
  timestamp    = {Mon, 25 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/BalazsGS16a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/PiresS16a,
  author       = {Bernardo {\'{A}}vila Pires and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Multiclass Classification Calibration Functions},
  journal      = {CoRR},
  volume       = {abs/1609.06385},
  year         = {2016},
  url          = {http://arxiv.org/abs/1609.06385},
  eprinttype    = {arXiv},
  eprint       = {1609.06385},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/PiresS16a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HuAGS16,
  author       = {Xiaowei Hu and
                  Prashanth L. A. and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {(Bandit) Convex Optimization with Biased Noisy Gradient Oracles},
  journal      = {CoRR},
  volume       = {abs/1609.07087},
  year         = {2016},
  url          = {http://arxiv.org/abs/1609.07087},
  eprinttype    = {arXiv},
  eprint       = {1609.07087},
  timestamp    = {Tue, 15 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/HuAGS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/LattimoreS16,
  author       = {Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri},
  title        = {The End of Optimism? An Asymptotic Analysis of Finite-Armed Linear
                  Bandits},
  journal      = {CoRR},
  volume       = {abs/1610.04491},
  year         = {2016},
  url          = {http://arxiv.org/abs/1610.04491},
  eprinttype    = {arXiv},
  eprint       = {1610.04491},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/LattimoreS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HanawalSS16,
  author       = {Manjesh Kumar Hanawal and
                  Csaba Szepesv{\'{a}}ri and
                  Venkatesh Saligrama},
  title        = {Sequential Learning without Feedback},
  journal      = {CoRR},
  volume       = {abs/1610.05394},
  year         = {2016},
  url          = {http://arxiv.org/abs/1610.05394},
  eprinttype    = {arXiv},
  eprint       = {1610.05394},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HanawalSS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ShaloudegiGSX16,
  author       = {Kiarash Shaloudegi and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri and
                  Wilsun Xu},
  title        = {{SDP} Relaxation with Randomized Rounding for Energy Disaggregation},
  journal      = {CoRR},
  volume       = {abs/1610.09491},
  year         = {2016},
  url          = {http://arxiv.org/abs/1610.09491},
  eprinttype    = {arXiv},
  eprint       = {1610.09491},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/ShaloudegiGSX16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/BardNSB15,
  author       = {Nolan Bard and
                  Deon Nicholas and
                  Csaba Szepesv{\'{a}}ri and
                  Michael Bowling},
  editor       = {Sam Ganzfried},
  title        = {Decision-Theoretic Clustering of Strategies},
  booktitle    = {Computer Poker and Imperfect Information, Papers from the 2015 {AAAI}
                  Workshop, Austin, Texas, USA, January 26, 2015},
  series       = {{AAAI} Technical Report},
  volume       = {{WS-15-07}},
  publisher    = {{AAAI} Press},
  year         = {2015},
  url          = {http://aaai.org/ocs/index.php/WS/AAAIW15/paper/view/10201},
  timestamp    = {Tue, 05 Sep 2023 08:59:27 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/BardNSB15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/PiresS15,
  author       = {Bernardo {\'{A}}vila Pires and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Michael Bowling and
                  Marc G. Bellemare and
                  Erik Talvitie and
                  Joel Veness and
                  Marlos C. Machado},
  title        = {Pathological Effects of Variance on Classification-Based Policy Iteration},
  booktitle    = {Learning for General Competency in Video Games, Papers from the 2015
                  {AAAI} Workshop, Austin, Texas, USA, January 26, 2015},
  series       = {{AAAI} Technical Report},
  volume       = {{WS-15-10}},
  publisher    = {{AAAI} Press},
  year         = {2015},
  url          = {http://aaai.org/ocs/index.php/WS/AAAIW15/paper/view/10098},
  timestamp    = {Tue, 05 Sep 2023 08:59:27 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/PiresS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/BalazsGS15,
  author       = {G{\'{a}}bor Bal{\'{a}}zs and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Guy Lebanon and
                  S. V. N. Vishwanathan},
  title        = {Near-optimal max-affine estimators for convex regression},
  booktitle    = {Proceedings of the Eighteenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2015, San Diego, California,
                  USA, May 9-12, 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {38},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v38/balazs15.html},
  timestamp    = {Mon, 25 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/aistats/BalazsGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/KvetonWAS15,
  author       = {Branislav Kveton and
                  Zheng Wen and
                  Azin Ashkan and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Guy Lebanon and
                  S. V. N. Vishwanathan},
  title        = {Tight Regret Bounds for Stochastic Combinatorial Semi-Bandits},
  booktitle    = {Proceedings of the Eighteenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2015, San Diego, California,
                  USA, May 9-12, 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {38},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v38/kveton15.html},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/KvetonWAS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/LiMS15,
  author       = {Lihong Li and
                  R{\'{e}}mi Munos and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Guy Lebanon and
                  S. V. N. Vishwanathan},
  title        = {Toward Minimax Off-policy Value Estimation},
  booktitle    = {Proceedings of the Eighteenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2015, San Diego, California,
                  USA, May 9-12, 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {38},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v38/li15b.html},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/LiMS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/ShariffGS15,
  author       = {Roshan Shariff and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Guy Lebanon and
                  S. V. N. Vishwanathan},
  title        = {Exploiting Symmetries to Construct Efficient {MCMC} Algorithms With
                  an Application to {SLAM}},
  booktitle    = {Proceedings of the Eighteenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2015, San Diego, California,
                  USA, May 9-12, 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {38},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v38/shariff15.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/ShariffGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/BardNSB15,
  author       = {Nolan Bard and
                  Deon Nicholas and
                  Csaba Szepesv{\'{a}}ri and
                  Michael H. Bowling},
  editor       = {Gerhard Weiss and
                  Pinar Yolum and
                  Rafael H. Bordini and
                  Edith Elkind},
  title        = {Decision-theoretic Clustering of Strategies},
  booktitle    = {Proceedings of the 2015 International Conference on Autonomous Agents
                  and Multiagent Systems, {AAMAS} 2015, Istanbul, Turkey, May 4-8, 2015},
  pages        = {17--25},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {http://dl.acm.org/citation.cfm?id=2772886},
  timestamp    = {Tue, 08 Mar 2022 10:12:47 +0100},
  biburl       = {https://dblp.org/rec/conf/atal/BardNSB15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/KvetonSWA15,
  author       = {Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Zheng Wen and
                  Azin Ashkan},
  editor       = {Francis R. Bach and
                  David M. Blei},
  title        = {Cascading Bandits: Learning to Rank in the Cascade Model},
  booktitle    = {Proceedings of the 32nd International Conference on Machine Learning,
                  {ICML} 2015, Lille, France, 6-11 July 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {37},
  pages        = {767--776},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v37/kveton15.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/KvetonSWA15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/WuGS15,
  author       = {Yifan Wu and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Francis R. Bach and
                  David M. Blei},
  title        = {On Identifying Good Options under Combinatorially Structured Feedback
                  in Finite Noisy Environments},
  booktitle    = {Proceedings of the 32nd International Conference on Machine Learning,
                  {ICML} 2015, Lille, France, 6-11 July 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {37},
  pages        = {1283--1291},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v37/wub15.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/WuGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/HuangGS15,
  author       = {Ruitong Huang and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Francis R. Bach and
                  David M. Blei},
  title        = {Deterministic Independent Component Analysis},
  booktitle    = {Proceedings of the 32nd International Conference on Machine Learning,
                  {ICML} 2015, Lille, France, 6-11 July 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {37},
  pages        = {2521--2530},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v37/huangb15.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/HuangGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/JoulaniGS15,
  author       = {Pooria Joulani and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Qiang Yang and
                  Michael J. Wooldridge},
  title        = {Fast Cross-Validation for Incremental Learning},
  booktitle    = {Proceedings of the Twenty-Fourth International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2015, Buenos Aires, Argentina, July
                  25-31, 2015},
  pages        = {3597--3604},
  publisher    = {{AAAI} Press},
  year         = {2015},
  url          = {http://ijcai.org/Abstract/15/506},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/JoulaniGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LattimoreCS15,
  author       = {Tor Lattimore and
                  Koby Crammer and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Corinna Cortes and
                  Neil D. Lawrence and
                  Daniel D. Lee and
                  Masashi Sugiyama and
                  Roman Garnett},
  title        = {Linear Multi-Resource Allocation with Semi-Bandit Feedback},
  booktitle    = {Advances in Neural Information Processing Systems 28: Annual Conference
                  on Neural Information Processing Systems 2015, December 7-12, 2015,
                  Montreal, Quebec, Canada},
  pages        = {964--972},
  year         = {2015},
  url          = {https://proceedings.neurips.cc/paper/2015/hash/851ddf5058cf22df63d3344ad89919cf-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/LattimoreCS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/WuGS15,
  author       = {Yifan Wu and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Corinna Cortes and
                  Neil D. Lawrence and
                  Daniel D. Lee and
                  Masashi Sugiyama and
                  Roman Garnett},
  title        = {Online Learning with Gaussian Payoffs and Side Observations},
  booktitle    = {Advances in Neural Information Processing Systems 28: Annual Conference
                  on Neural Information Processing Systems 2015, December 7-12, 2015,
                  Montreal, Quebec, Canada},
  pages        = {1360--1368},
  year         = {2015},
  url          = {https://proceedings.neurips.cc/paper/2015/hash/8e82ab7243b7c66d768f1b8ce1c967eb-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/WuGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/KvetonWAS15,
  author       = {Branislav Kveton and
                  Zheng Wen and
                  Azin Ashkan and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Corinna Cortes and
                  Neil D. Lawrence and
                  Daniel D. Lee and
                  Masashi Sugiyama and
                  Roman Garnett},
  title        = {Combinatorial Cascading Bandits},
  booktitle    = {Advances in Neural Information Processing Systems 28: Annual Conference
                  on Neural Information Processing Systems 2015, December 7-12, 2015,
                  Montreal, Quebec, Canada},
  pages        = {1450--1458},
  year         = {2015},
  url          = {https://proceedings.neurips.cc/paper/2015/hash/1f50893f80d6830d62765ffad7721742-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/KvetonWAS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HsuKS15,
  author       = {Daniel J. Hsu and
                  Aryeh Kontorovich and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Corinna Cortes and
                  Neil D. Lawrence and
                  Daniel D. Lee and
                  Masashi Sugiyama and
                  Roman Garnett},
  title        = {Mixing Time Estimation in Reversible Markov Chains from a Single Sample
                  Path},
  booktitle    = {Advances in Neural Information Processing Systems 28: Annual Conference
                  on Neural Information Processing Systems 2015, December 7-12, 2015,
                  Montreal, Quebec, Canada},
  pages        = {1459--1467},
  year         = {2015},
  url          = {https://proceedings.neurips.cc/paper/2015/hash/7ce3284b743aefde80ffd9aec500e085-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/HsuKS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/Abbasi-YadkoriS15,
  author       = {Yasin Abbasi{-}Yadkori and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Marina Meila and
                  Tom Heskes},
  title        = {Bayesian Optimal Control of Smoothly Parameterized Systems},
  booktitle    = {Proceedings of the Thirty-First Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2015, July 12-16, 2015, Amsterdam, The Netherlands},
  pages        = {1--11},
  publisher    = {{AUAI} Press},
  year         = {2015},
  url          = {http://auai.org/uai2015/proceedings/papers/271.pdf},
  timestamp    = {Thu, 12 Mar 2020 11:31:09 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/Abbasi-YadkoriS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/KvetonSWA15,
  author       = {Branislav Kveton and
                  Csaba Szepesv{\'{a}}ri and
                  Zheng Wen and
                  Azin Ashkan},
  title        = {Cascading Bandits},
  journal      = {CoRR},
  volume       = {abs/1502.02763},
  year         = {2015},
  url          = {http://arxiv.org/abs/1502.02763},
  eprinttype    = {arXiv},
  eprint       = {1502.02763},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KvetonSWA15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HsuKS15,
  author       = {Daniel J. Hsu and
                  Aryeh Kontorovich and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Mixing Time Estimation in Reversible Markov Chains from a Single Sample
                  Path},
  journal      = {CoRR},
  volume       = {abs/1506.02903},
  year         = {2015},
  url          = {http://arxiv.org/abs/1506.02903},
  eprinttype    = {arXiv},
  eprint       = {1506.02903},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HsuKS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/JoulaniGS15,
  author       = {Pooria Joulani and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Fast Cross-Validation for Incremental Learning},
  journal      = {CoRR},
  volume       = {abs/1507.00066},
  year         = {2015},
  url          = {http://arxiv.org/abs/1507.00066},
  eprinttype    = {arXiv},
  eprint       = {1507.00066},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/JoulaniGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/KvetonWAS15,
  author       = {Branislav Kveton and
                  Zheng Wen and
                  Azin Ashkan and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Combinatorial Cascading Bandits},
  journal      = {CoRR},
  volume       = {abs/1507.04208},
  year         = {2015},
  url          = {http://arxiv.org/abs/1507.04208},
  eprinttype    = {arXiv},
  eprint       = {1507.04208},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KvetonWAS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/WuGS15,
  author       = {Yifan Wu and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Online Learning with Gaussian Payoffs and Side Observations},
  journal      = {CoRR},
  volume       = {abs/1510.08108},
  year         = {2015},
  url          = {http://arxiv.org/abs/1510.08108},
  eprinttype    = {arXiv},
  eprint       = {1510.08108},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/WuGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HuangXSS15,
  author       = {Ruitong Huang and
                  Bing Xu and
                  Dale Schuurmans and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Learning with a Strong Adversary},
  journal      = {CoRR},
  volume       = {abs/1511.03034},
  year         = {2015},
  url          = {http://arxiv.org/abs/1511.03034},
  eprinttype    = {arXiv},
  eprint       = {1511.03034},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HuangXSS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/mor/BartokFPRS14,
  author       = {G{\'{a}}bor Bart{\'{o}}k and
                  Dean P. Foster and
                  D{\'{a}}vid P{\'{a}}l and
                  Alexander Rakhlin and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Partial Monitoring - Classification, Regret Bounds, and Algorithms},
  journal      = {Math. Oper. Res.},
  volume       = {39},
  number       = {4},
  pages        = {967--997},
  year         = {2014},
  url          = {https://doi.org/10.1287/moor.2014.0663},
  doi          = {10.1287/MOOR.2014.0663},
  timestamp    = {Sun, 28 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/mor/BartokFPRS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tac/NeuGSA14,
  author       = {Gergely Neu and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s Antos},
  title        = {Online Markov Decision Processes Under Bandit Feedback},
  journal      = {{IEEE} Trans. Autom. Control.},
  volume       = {59},
  number       = {3},
  pages        = {676--691},
  year         = {2014},
  url          = {https://doi.org/10.1109/TAC.2013.2292137},
  doi          = {10.1109/TAC.2013.2292137},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tac/NeuGSA14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tcs/KivinenSZ14,
  author       = {Jyrki Kivinen and
                  Csaba Szepesv{\'{a}}ri and
                  Thomas Zeugmann},
  title        = {Guest Editors' introduction},
  journal      = {Theor. Comput. Sci.},
  volume       = {519},
  pages        = {1--3},
  year         = {2014},
  url          = {https://doi.org/10.1016/j.tcs.2013.09.018},
  doi          = {10.1016/J.TCS.2013.09.018},
  timestamp    = {Wed, 17 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tcs/KivinenSZ14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tsp/LeSZ14,
  author       = {Thanh Le and
                  Csaba Szepesv{\'{a}}ri and
                  Rong Zheng},
  title        = {Sequential Learning for Multi-Channel Wireless Network Monitoring
                  With Channel Switching Costs},
  journal      = {{IEEE} Trans. Signal Process.},
  volume       = {62},
  number       = {22},
  pages        = {5919--5929},
  year         = {2014},
  url          = {https://doi.org/10.1109/TSP.2014.2357779},
  doi          = {10.1109/TSP.2014.2357779},
  timestamp    = {Sat, 18 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tsp/LeSZ14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/adprl/YaoSPZ14,
  author       = {Hengshuai Yao and
                  Csaba Szepesv{\'{a}}ri and
                  Bernardo {\'{A}}vila Pires and
                  Xinhua Zhang},
  title        = {Pseudo-MDPs and factored linear action models},
  booktitle    = {2014 {IEEE} Symposium on Adaptive Dynamic Programming and Reinforcement
                  Learning, {ADPRL} 2014, Orlando, FL, USA, December 9-12, 2014},
  pages        = {1--9},
  publisher    = {{IEEE}},
  year         = {2014},
  url          = {https://doi.org/10.1109/ADPRL.2014.7010633},
  doi          = {10.1109/ADPRL.2014.7010633},
  timestamp    = {Wed, 16 Oct 2019 14:14:48 +0200},
  biburl       = {https://dblp.org/rec/conf/adprl/YaoSPZ14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/HuangS14,
  author       = {Ruitong Huang and
                  Csaba Szepesv{\'{a}}ri},
  title        = {A Finite-Sample Generalization Bound for Semiparametric Regression:
                  Partially Linear Models},
  booktitle    = {Proceedings of the Seventeenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2014, Reykjavik, Iceland, April
                  22-25, 2014},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {33},
  pages        = {402--410},
  publisher    = {JMLR.org},
  year         = {2014},
  url          = {http://proceedings.mlr.press/v33/huang14.html},
  timestamp    = {Wed, 29 May 2019 08:41:44 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/HuangS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/LattimoreGS14,
  author       = {Tor Lattimore and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Peter Auer and
                  Alexander Clark and
                  Thomas Zeugmann and
                  Sandra Zilles},
  title        = {On Learning the Optimal Waiting Time},
  booktitle    = {Algorithmic Learning Theory - 25th International Conference, {ALT}
                  2014, Bled, Slovenia, October 8-10, 2014. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {8776},
  pages        = {200--214},
  publisher    = {Springer},
  year         = {2014},
  url          = {https://doi.org/10.1007/978-3-319-11662-4\_15},
  doi          = {10.1007/978-3-319-11662-4\_15},
  timestamp    = {Sun, 25 Oct 2020 22:46:29 +0100},
  biburl       = {https://dblp.org/rec/conf/alt/LattimoreGS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/DickGS14,
  author       = {Travis Dick and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Online Learning in Markov Decision Processes with Changing Cost Sequences},
  booktitle    = {Proceedings of the 31th International Conference on Machine Learning,
                  {ICML} 2014, Beijing, China, 21-26 June 2014},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {32},
  pages        = {512--520},
  publisher    = {JMLR.org},
  year         = {2014},
  url          = {http://proceedings.mlr.press/v32/dick14.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/DickGS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/NeufeldGSS14,
  author       = {James Neufeld and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  title        = {Adaptive Monte Carlo via Bandit Allocation},
  booktitle    = {Proceedings of the 31th International Conference on Machine Learning,
                  {ICML} 2014, Beijing, China, 21-26 June 2014},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {32},
  pages        = {1944--1952},
  publisher    = {JMLR.org},
  year         = {2014},
  url          = {http://proceedings.mlr.press/v32/neufeld14.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/NeufeldGSS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/isaim/HuangS14,
  author       = {Ruitong Huang and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Generalization Bounds for Partially Linear Models},
  booktitle    = {International Symposium on Artificial Intelligence and Mathematics,
                  {ISAIM} 2014, Fort Lauderdale, FL, USA, January 6-8, 2014},
  year         = {2014},
  url          = {http://www.cs.uic.edu/pub/Isaim2014/WebPreferences/ISAIM2014\_ML\_Huang\_Szepesvari.pdf},
  timestamp    = {Wed, 20 Mar 2024 17:48:47 +0100},
  biburl       = {https://dblp.org/rec/conf/isaim/HuangS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/YaoSSMB14,
  author       = {Hengshuai Yao and
                  Csaba Szepesv{\'{a}}ri and
                  Richard S. Sutton and
                  Joseph Modayil and
                  Shalabh Bhatnagar},
  editor       = {Zoubin Ghahramani and
                  Max Welling and
                  Corinna Cortes and
                  Neil D. Lawrence and
                  Kilian Q. Weinberger},
  title        = {Universal Option Models},
  booktitle    = {Advances in Neural Information Processing Systems 27: Annual Conference
                  on Neural Information Processing Systems 2014, December 8-13 2014,
                  Montreal, Quebec, Canada},
  pages        = {990--998},
  year         = {2014},
  url          = {https://proceedings.neurips.cc/paper/2014/hash/996a7fa078cc36c46d02f9af3bef918b-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/YaoSSMB14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/LattimoreCS14,
  author       = {Tor Lattimore and
                  Koby Crammer and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Nevin L. Zhang and
                  Jin Tian},
  title        = {Optimal Resource Allocation with Semi-Bandit Feedback},
  booktitle    = {Proceedings of the Thirtieth Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2014, Quebec City, Quebec, Canada, July 23-27,
                  2014},
  pages        = {477--486},
  publisher    = {{AUAI} Press},
  year         = {2014},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=2483\&\#38;proceeding\_id=30},
  timestamp    = {Wed, 03 Feb 2021 11:09:27 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/LattimoreCS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@proceedings{DBLP:conf/colt/2014,
  editor       = {Maria{-}Florina Balcan and
                  Vitaly Feldman and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Proceedings of The 27th Conference on Learning Theory, {COLT} 2014,
                  Barcelona, Spain, June 13-15, 2014},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {35},
  publisher    = {JMLR.org},
  year         = {2014},
  url          = {http://proceedings.mlr.press/v35/},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/colt/2014.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/NeufeldGSS14,
  author       = {James Neufeld and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Dale Schuurmans and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Adaptive Monte Carlo via Bandit Allocation},
  journal      = {CoRR},
  volume       = {abs/1405.3318},
  year         = {2014},
  url          = {http://arxiv.org/abs/1405.3318},
  eprinttype    = {arXiv},
  eprint       = {1405.3318},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/NeufeldGSS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/LattimoreCS14,
  author       = {Tor Lattimore and
                  Koby Crammer and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Optimal Resource Allocation with Semi-Bandit Feedback},
  journal      = {CoRR},
  volume       = {abs/1406.3840},
  year         = {2014},
  url          = {http://arxiv.org/abs/1406.3840},
  eprinttype    = {arXiv},
  eprint       = {1406.3840},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/LattimoreCS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/Abbasi-YadkoriS14,
  author       = {Yasin Abbasi{-}Yadkori and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Bayesian Optimal Control of Smoothly Parameterized Systems: The Lazy
                  Posterior Sampling Algorithm},
  journal      = {CoRR},
  volume       = {abs/1406.3926},
  year         = {2014},
  url          = {http://arxiv.org/abs/1406.3926},
  eprinttype    = {arXiv},
  eprint       = {1406.3926},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/Abbasi-YadkoriS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/LiMS14,
  author       = {Lihong Li and
                  R{\'{e}}mi Munos and
                  Csaba Szepesv{\'{a}}ri},
  title        = {On Minimax Optimal Offline Policy Evaluation},
  journal      = {CoRR},
  volume       = {abs/1409.3653},
  year         = {2014},
  url          = {http://arxiv.org/abs/1409.3653},
  eprinttype    = {arXiv},
  eprint       = {1409.3653},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/LiMS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/KvetonWAS14,
  author       = {Branislav Kveton and
                  Zheng Wen and
                  Azin Ashkan and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Tight Regret Bounds for Stochastic Combinatorial Semi-Bandits},
  journal      = {CoRR},
  volume       = {abs/1410.0949},
  year         = {2014},
  url          = {http://arxiv.org/abs/1410.0949},
  eprinttype    = {arXiv},
  eprint       = {1410.0949},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KvetonWAS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/AfkanpourSB13,
  author       = {Arash Afkanpour and
                  Csaba Szepesv{\'{a}}ri and
                  Michael Bowling},
  title        = {Alignment based kernel learning with a continuous set of base kernels},
  journal      = {Mach. Learn.},
  volume       = {91},
  number       = {3},
  pages        = {305--324},
  year         = {2013},
  url          = {https://doi.org/10.1007/s10994-013-5361-8},
  doi          = {10.1007/S10994-013-5361-8},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/AfkanpourSB13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tcs/AntosBPS13,
  author       = {Andr{\'{a}}s Antos and
                  G{\'{a}}bor Bart{\'{o}}k and
                  D{\'{a}}vid P{\'{a}}l and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Toward a classification of finite partial-monitoring games},
  journal      = {Theor. Comput. Sci.},
  volume       = {473},
  pages        = {77--99},
  year         = {2013},
  url          = {https://doi.org/10.1016/j.tcs.2012.10.008},
  doi          = {10.1016/J.TCS.2012.10.008},
  timestamp    = {Wed, 17 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tcs/AntosBPS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/AfkanpourGSB13,
  author       = {Arash Afkanpour and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri and
                  Michael Bowling},
  title        = {A Randomized Mirror Descent Algorithm for Large Scale Multiple Kernel
                  Learning},
  booktitle    = {Proceedings of the 30th International Conference on Machine Learning,
                  {ICML} 2013, Atlanta, GA, USA, 16-21 June 2013},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {28},
  pages        = {374--382},
  publisher    = {JMLR.org},
  year         = {2013},
  url          = {http://proceedings.mlr.press/v28/afkanpour13.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/AfkanpourGSB13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/YuCSS13,
  author       = {Yaoliang Yu and
                  Hao Cheng and
                  Dale Schuurmans and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Characterizing the Representer Theorem},
  booktitle    = {Proceedings of the 30th International Conference on Machine Learning,
                  {ICML} 2013, Atlanta, GA, USA, 16-21 June 2013},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {28},
  pages        = {570--578},
  publisher    = {JMLR.org},
  year         = {2013},
  url          = {http://proceedings.mlr.press/v28/yu13.html},
  timestamp    = {Tue, 03 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/YuCSS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/PiresSG13,
  author       = {Bernardo {\'{A}}vila Pires and
                  Csaba Szepesv{\'{a}}ri and
                  Mohammad Ghavamzadeh},
  title        = {Cost-sensitive Multiclass Classification Risk Bounds},
  booktitle    = {Proceedings of the 30th International Conference on Machine Learning,
                  {ICML} 2013, Atlanta, GA, USA, 16-21 June 2013},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {28},
  pages        = {1391--1399},
  publisher    = {JMLR.org},
  year         = {2013},
  url          = {http://proceedings.mlr.press/v28/avilapires13.html},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/PiresSG13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/JoulaniGS13,
  author       = {Pooria Joulani and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Online Learning under Delayed Feedback},
  booktitle    = {Proceedings of the 30th International Conference on Machine Learning,
                  {ICML} 2013, Atlanta, GA, USA, 16-21 June 2013},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {28},
  pages        = {1453--1461},
  publisher    = {JMLR.org},
  year         = {2013},
  url          = {http://proceedings.mlr.press/v28/joulani13.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/JoulaniGS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZolghadrBGGS13,
  author       = {Navid Zolghadr and
                  G{\'{a}}bor Bart{\'{o}}k and
                  Russell Greiner and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Christopher J. C. Burges and
                  L{\'{e}}on Bottou and
                  Zoubin Ghahramani and
                  Kilian Q. Weinberger},
  title        = {Online Learning with Costly Features and Labels},
  booktitle    = {Advances in Neural Information Processing Systems 26: 27th Annual
                  Conference on Neural Information Processing Systems 2013. Proceedings
                  of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States},
  pages        = {1241--1249},
  year         = {2013},
  url          = {https://proceedings.neurips.cc/paper/2013/hash/291597a100aadd814d197af4f4bab3a7-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ZolghadrBGGS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Abbasi-YadkoriBKSS13,
  author       = {Yasin Abbasi{-}Yadkori and
                  Peter L. Bartlett and
                  Varun Kanade and
                  Yevgeny Seldin and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Christopher J. C. Burges and
                  L{\'{e}}on Bottou and
                  Zoubin Ghahramani and
                  Kilian Q. Weinberger},
  title        = {Online Learning in Markov Decision Processes with Adversarially Chosen
                  Transition Probability Distributions},
  booktitle    = {Advances in Neural Information Processing Systems 26: 27th Annual
                  Conference on Neural Information Processing Systems 2013. Proceedings
                  of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States},
  pages        = {2508--2516},
  year         = {2013},
  url          = {https://proceedings.neurips.cc/paper/2013/hash/4f284803bd0966cc24fa8683a34afc6e-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/Abbasi-YadkoriBKSS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1303-3055,
  author       = {Yasin Abbasi{-}Yadkori and
                  Peter L. Bartlett and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Online Learning in Markov Decision Processes with Adversarially Chosen
                  Transition Probability Distributions},
  journal      = {CoRR},
  volume       = {abs/1303.3055},
  year         = {2013},
  url          = {http://arxiv.org/abs/1303.3055},
  eprinttype    = {arXiv},
  eprint       = {1303.3055},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1303-3055.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/JoulaniGS13,
  author       = {Pooria Joulani and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Online Learning under Delayed Feedback},
  journal      = {CoRR},
  volume       = {abs/1306.0686},
  year         = {2013},
  url          = {http://arxiv.org/abs/1306.0686},
  eprinttype    = {arXiv},
  eprint       = {1306.0686},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/JoulaniGS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cacm/GellyKSSSST12,
  author       = {Sylvain Gelly and
                  Levente Kocsis and
                  Marc Schoenauer and
                  Mich{\`{e}}le Sebag and
                  David Silver and
                  Csaba Szepesv{\'{a}}ri and
                  Olivier Teytaud},
  title        = {The grand challenge of computer Go: Monte Carlo tree search and extensions},
  journal      = {Commun. {ACM}},
  volume       = {55},
  number       = {3},
  pages        = {106--113},
  year         = {2012},
  url          = {https://doi.org/10.1145/2093548.2093574},
  doi          = {10.1145/2093548.2093574},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/cacm/GellyKSSSST12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/YaoS12,
  author       = {Hengshuai Yao and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {J{\"{o}}rg Hoffmann and
                  Bart Selman},
  title        = {Approximate Policy Iteration with Linear Action Models},
  booktitle    = {Proceedings of the Twenty-Sixth {AAAI} Conference on Artificial Intelligence,
                  July 22-26, 2012, Toronto, Ontario, Canada},
  pages        = {1212--1218},
  publisher    = {{AAAI} Press},
  year         = {2012},
  url          = {https://doi.org/10.1609/aaai.v26i1.8319},
  doi          = {10.1609/AAAI.V26I1.8319},
  timestamp    = {Sat, 21 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/YaoS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/BartokS12,
  author       = {G{\'{a}}bor Bart{\'{o}}k and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Nader H. Bshouty and
                  Gilles Stoltz and
                  Nicolas Vayatis and
                  Thomas Zeugmann},
  title        = {Partial Monitoring with Side Information},
  booktitle    = {Algorithmic Learning Theory - 23rd International Conference, {ALT}
                  2012, Lyon, France, October 29-31, 2012. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {7568},
  pages        = {305--319},
  publisher    = {Springer},
  year         = {2012},
  url          = {https://doi.org/10.1007/978-3-642-34106-9\_25},
  doi          = {10.1007/978-3-642-34106-9\_25},
  timestamp    = {Tue, 14 May 2019 10:00:51 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/BartokS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ewrl/DeisenrothSP12,
  author       = {Marc Peter Deisenroth and
                  Csaba Szepesv{\'{a}}ri and
                  Jan Peters},
  editor       = {Marc Peter Deisenroth and
                  Csaba Szepesv{\'{a}}ri and
                  Jan Peters},
  title        = {Preface},
  booktitle    = {Proceedings of the Tenth European Workshop on Reinforcement Learning,
                  {EWRL} 2012, Edinburgh, Scotland, UK, June, 2012},
  series       = {{JMLR} Proceedings},
  volume       = {24},
  publisher    = {JMLR.org},
  year         = {2012},
  url          = {http://proceedings.mlr.press/v24/deisenroth12a/deisenroth12a.pdf},
  timestamp    = {Wed, 29 May 2019 08:41:44 +0200},
  biburl       = {https://dblp.org/rec/conf/ewrl/DeisenrothSP12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ewrl/SeldinSAA12,
  author       = {Yevgeny Seldin and
                  Csaba Szepesv{\'{a}}ri and
                  Peter Auer and
                  Yasin Abbasi{-}Yadkori},
  editor       = {Marc Peter Deisenroth and
                  Csaba Szepesv{\'{a}}ri and
                  Jan Peters},
  title        = {Evaluation and Analysis of the Performance of the {EXP3} Algorithm
                  in Stochastic Environments},
  booktitle    = {Proceedings of the Tenth European Workshop on Reinforcement Learning,
                  {EWRL} 2012, Edinburgh, Scotland, UK, June, 2012},
  series       = {{JMLR} Proceedings},
  volume       = {24},
  pages        = {103--116},
  publisher    = {JMLR.org},
  year         = {2012},
  url          = {http://proceedings.mlr.press/v24/seldin12a.html},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ewrl/SeldinSAA12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/BartokZS12,
  author       = {G{\'{a}}bor Bart{\'{o}}k and
                  Navid Zolghadr and
                  Csaba Szepesv{\'{a}}ri},
  title        = {An adaptive algorithm for finite stochastic partial monitoring},
  booktitle    = {Proceedings of the 29th International Conference on Machine Learning,
                  {ICML} 2012, Edinburgh, Scotland, UK, June 26 - July 1, 2012},
  publisher    = {icml.cc / Omnipress},
  year         = {2012},
  url          = {http://icml.cc/2012/papers/846.pdf},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/BartokZS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/PiresS12,
  author       = {Bernardo {\'{A}}vila Pires and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Statistical linear estimation with penalized estimators: an application
                  to reinforcement learning},
  booktitle    = {Proceedings of the 29th International Conference on Machine Learning,
                  {ICML} 2012, Edinburgh, Scotland, UK, June 26 - July 1, 2012},
  publisher    = {icml.cc / Omnipress},
  year         = {2012},
  url          = {http://icml.cc/2012/papers/759.pdf},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/PiresS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/YuS12,
  author       = {Yaoliang Yu and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Analysis of Kernel Mean Matching under Covariate Shift},
  booktitle    = {Proceedings of the 29th International Conference on Machine Learning,
                  {ICML} 2012, Edinburgh, Scotland, UK, June 26 - July 1, 2012},
  publisher    = {icml.cc / Omnipress},
  year         = {2012},
  url          = {http://icml.cc/2012/papers/330.pdf},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/YuS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/KirosS12,
  author       = {Ryan Kiros and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Peter L. Bartlett and
                  Fernando C. N. Pereira and
                  Christopher J. C. Burges and
                  L{\'{e}}on Bottou and
                  Kilian Q. Weinberger},
  title        = {Deep Representations and Codes for Image Auto-Annotation},
  booktitle    = {Advances in Neural Information Processing Systems 25: 26th Annual
                  Conference on Neural Information Processing Systems 2012. Proceedings
                  of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States},
  pages        = {917--925},
  year         = {2012},
  url          = {https://proceedings.neurips.cc/paper/2012/hash/3c7781a36bcd6cf08c11a970fbe0e2a6-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/KirosS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/jmlr/Abbasi-YadkoriPS12,
  author       = {Yasin Abbasi{-}Yadkori and
                  D{\'{a}}vid P{\'{a}}l and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Neil D. Lawrence and
                  Mark A. Girolami},
  title        = {Online-to-Confidence-Set Conversions and Application to Sparse Stochastic
                  Bandits},
  booktitle    = {Proceedings of the Fifteenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2012, La Palma, Canary Islands,
                  Spain, April 21-23, 2012},
  series       = {{JMLR} Proceedings},
  volume       = {22},
  pages        = {1--9},
  publisher    = {JMLR.org},
  year         = {2012},
  url          = {http://proceedings.mlr.press/v22/abbasi-yadkori12.html},
  timestamp    = {Wed, 29 May 2019 08:41:44 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/Abbasi-YadkoriPS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/jmlr/NeuGS12,
  author       = {Gergely Neu and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Neil D. Lawrence and
                  Mark A. Girolami},
  title        = {The adversarial stochastic shortest path problem with unknown transition
                  probabilities},
  booktitle    = {Proceedings of the Fifteenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2012, La Palma, Canary Islands,
                  Spain, April 21-23, 2012},
  series       = {{JMLR} Proceedings},
  volume       = {22},
  pages        = {805--813},
  publisher    = {JMLR.org},
  year         = {2012},
  url          = {http://proceedings.mlr.press/v22/neu12.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/NeuGS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@proceedings{DBLP:conf/ewrl/2012,
  editor       = {Marc Peter Deisenroth and
                  Csaba Szepesv{\'{a}}ri and
                  Jan Peters},
  title        = {Proceedings of the Tenth European Workshop on Reinforcement Learning,
                  {EWRL} 2012, Edinburgh, Scotland, UK, June, 2012},
  series       = {{JMLR} Proceedings},
  volume       = {24},
  publisher    = {JMLR.org},
  year         = {2012},
  url          = {http://proceedings.mlr.press/v24/},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ewrl/2012.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1202-3717,
  author       = {Mahdi Milani Fard and
                  Joelle Pineau and
                  Csaba Szepesv{\'{a}}ri},
  title        = {PAC-Bayesian Policy Evaluation for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1202.3717},
  year         = {2012},
  url          = {http://arxiv.org/abs/1202.3717},
  eprinttype    = {arXiv},
  eprint       = {1202.3717},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1202-3717.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1205-0288,
  author       = {Arash Afkanpour and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri and
                  Michael H. Bowling},
  title        = {A Randomized Strategy for Learning to Combine Many Features},
  journal      = {CoRR},
  volume       = {abs/1205.0288},
  year         = {2012},
  url          = {http://arxiv.org/abs/1205.0288},
  eprinttype    = {arXiv},
  eprint       = {1205.0288},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1205-0288.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1206-3233,
  author       = {Alejandro Isaza and
                  Csaba Szepesv{\'{a}}ri and
                  Vadim Bulitko and
                  Russell Greiner},
  title        = {Speeding Up Planning in Markov Decision Processes via Automatically
                  Constructed Abstractions},
  journal      = {CoRR},
  volume       = {abs/1206.3233},
  year         = {2012},
  url          = {http://arxiv.org/abs/1206.3233},
  eprinttype    = {arXiv},
  eprint       = {1206.3233},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1206-3233.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1206-3285,
  author       = {Richard S. Sutton and
                  Csaba Szepesv{\'{a}}ri and
                  Alborz Geramifard and
                  Michael Bowling},
  title        = {Dyna-Style Planning with Linear Function Approximation and Prioritized
                  Sweeping},
  journal      = {CoRR},
  volume       = {abs/1206.3285},
  year         = {2012},
  url          = {http://arxiv.org/abs/1206.3285},
  eprinttype    = {arXiv},
  eprint       = {1206.3285},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1206-3285.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1206-4650,
  author       = {Yaoliang Yu and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Analysis of Kernel Mean Matching under Covariate Shift},
  journal      = {CoRR},
  volume       = {abs/1206.4650},
  year         = {2012},
  url          = {http://arxiv.org/abs/1206.4650},
  eprinttype    = {arXiv},
  eprint       = {1206.4650},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1206-4650.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1206-5264,
  author       = {Gergely Neu and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Apprenticeship Learning using Inverse Reinforcement Learning and Gradient
                  Methods},
  journal      = {CoRR},
  volume       = {abs/1206.5264},
  year         = {2012},
  url          = {http://arxiv.org/abs/1206.5264},
  eprinttype    = {arXiv},
  eprint       = {1206.5264},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1206-5264.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/BubeckMSS11,
  author       = {S{\'{e}}bastien Bubeck and
                  R{\'{e}}mi Munos and
                  Gilles Stoltz and
                  Csaba Szepesv{\'{a}}ri},
  title        = {\emph{X}-Armed Bandits},
  journal      = {J. Mach. Learn. Res.},
  volume       = {12},
  pages        = {1655--1695},
  year         = {2011},
  url          = {https://dl.acm.org/doi/10.5555/1953048.2021053},
  doi          = {10.5555/1953048.2021053},
  timestamp    = {Thu, 02 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/BubeckMSS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/FarahmandS11,
  author       = {Amir Massoud Farahmand and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Model selection in reinforcement learning},
  journal      = {Mach. Learn.},
  volume       = {85},
  number       = {3},
  pages        = {299--332},
  year         = {2011},
  url          = {https://doi.org/10.1007/s10994-011-5254-7},
  doi          = {10.1007/S10994-011-5254-7},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/FarahmandS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/KivinenSUZ11,
  author       = {Jyrki Kivinen and
                  Csaba Szepesv{\'{a}}ri and
                  Esko Ukkonen and
                  Thomas Zeugmann},
  editor       = {Jyrki Kivinen and
                  Csaba Szepesv{\'{a}}ri and
                  Esko Ukkonen and
                  Thomas Zeugmann},
  title        = {Editors' Introduction},
  booktitle    = {Algorithmic Learning Theory - 22nd International Conference, {ALT}
                  2011, Espoo, Finland, October 5-7, 2011. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {6925},
  pages        = {1--13},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-24412-4\_1},
  doi          = {10.1007/978-3-642-24412-4\_1},
  timestamp    = {Tue, 14 May 2019 10:00:51 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/KivinenSUZ11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ewrl/Szepesvari11,
  author       = {Csaba Szepesv{\'{a}}ri},
  editor       = {Scott Sanner and
                  Marcus Hutter},
  title        = {Invited Talk: Towards Robust Reinforcement Learning Algorithms},
  booktitle    = {Recent Advances in Reinforcement Learning - 9th European Workshop,
                  {EWRL} 2011, Athens, Greece, September 9-11, 2011, Revised Selected
                  Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {7188},
  pages        = {4},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-29946-9\_4},
  doi          = {10.1007/978-3-642-29946-9\_4},
  timestamp    = {Tue, 14 May 2019 10:00:48 +0200},
  biburl       = {https://dblp.org/rec/conf/ewrl/Szepesvari11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/infocom/AroraSZ11,
  author       = {Pallavi Arora and
                  Csaba Szepesv{\'{a}}ri and
                  Rong Zheng},
  title        = {Sequential learning for optimal monitoring of multi-channel wireless
                  networks},
  booktitle    = {{INFOCOM} 2011. 30th {IEEE} International Conference on Computer Communications,
                  Joint Conference of the {IEEE} Computer and Communications Societies,
                  10-15 April 2011, Shanghai, China},
  pages        = {1152--1160},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/INFCOM.2011.5934892},
  doi          = {10.1109/INFCOM.2011.5934892},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/infocom/AroraSZ11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Abbasi-YadkoriPS11,
  author       = {Yasin Abbasi{-}Yadkori and
                  D{\'{a}}vid P{\'{a}}l and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {John Shawe{-}Taylor and
                  Richard S. Zemel and
                  Peter L. Bartlett and
                  Fernando C. N. Pereira and
                  Kilian Q. Weinberger},
  title        = {Improved Algorithms for Linear Stochastic Bandits},
  booktitle    = {Advances in Neural Information Processing Systems 24: 25th Annual
                  Conference on Neural Information Processing Systems 2011. Proceedings
                  of a meeting held 12-14 December 2011, Granada, Spain},
  pages        = {2312--2320},
  year         = {2011},
  url          = {https://proceedings.neurips.cc/paper/2011/hash/e1d5be1c7f2f456670de3d53c7b54f4a-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/Abbasi-YadkoriPS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/FardPS11,
  author       = {Mahdi Milani Fard and
                  Joelle Pineau and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {F{\'{a}}bio Gagliardi Cozman and
                  Avi Pfeffer},
  title        = {PAC-Bayesian Policy Evaluation for Reinforcement Learning},
  booktitle    = {{UAI} 2011, Proceedings of the Twenty-Seventh Conference on Uncertainty
                  in Artificial Intelligence, Barcelona, Spain, July 14-17, 2011},
  pages        = {195--202},
  publisher    = {{AUAI} Press},
  year         = {2011},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=2218\&\#38;proceeding\_id=27},
  timestamp    = {Wed, 03 Feb 2021 11:09:03 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/FardPS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/jmlr/Abbasi-YadkoriS11,
  author       = {Yasin Abbasi{-}Yadkori and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Sham M. Kakade and
                  Ulrike von Luxburg},
  title        = {Regret Bounds for the Adaptive Control of Linear Quadratic Systems},
  booktitle    = {{COLT} 2011 - The 24th Annual Conference on Learning Theory, June
                  9-11, 2011, Budapest, Hungary},
  series       = {{JMLR} Proceedings},
  volume       = {19},
  pages        = {1--26},
  publisher    = {JMLR.org},
  year         = {2011},
  url          = {http://proceedings.mlr.press/v19/abbasi-yadkori11a/abbasi-yadkori11a.pdf},
  timestamp    = {Wed, 29 May 2019 08:41:47 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/Abbasi-YadkoriS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/jmlr/BartokPS11,
  author       = {G{\'{a}}bor Bart{\'{o}}k and
                  D{\'{a}}vid P{\'{a}}l and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Sham M. Kakade and
                  Ulrike von Luxburg},
  title        = {Minimax Regret of Finite Partial-Monitoring Games in Stochastic Environments},
  booktitle    = {{COLT} 2011 - The 24th Annual Conference on Learning Theory, June
                  9-11, 2011, Budapest, Hungary},
  series       = {{JMLR} Proceedings},
  volume       = {19},
  pages        = {133--154},
  publisher    = {JMLR.org},
  year         = {2011},
  url          = {http://proceedings.mlr.press/v19/bartok11a/bartok11a.pdf},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/BartokPS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/jmlr/SzitaS11,
  author       = {Istv{\'{a}}n Szita and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Sham M. Kakade and
                  Ulrike von Luxburg},
  title        = {Agnostic {KWIK} learning and efficient approximate reinforcement learning},
  booktitle    = {{COLT} 2011 - The 24th Annual Conference on Learning Theory, June
                  9-11, 2011, Budapest, Hungary},
  series       = {{JMLR} Proceedings},
  volume       = {19},
  pages        = {739--772},
  publisher    = {JMLR.org},
  year         = {2011},
  url          = {http://proceedings.mlr.press/v19/szita11a/szita11a.pdf},
  timestamp    = {Wed, 29 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/SzitaS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@proceedings{DBLP:conf/alt/2011,
  editor       = {Jyrki Kivinen and
                  Csaba Szepesv{\'{a}}ri and
                  Esko Ukkonen and
                  Thomas Zeugmann},
  title        = {Algorithmic Learning Theory - 22nd International Conference, {ALT}
                  2011, Espoo, Finland, October 5-7, 2011. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {6925},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-24412-4},
  doi          = {10.1007/978-3-642-24412-4},
  isbn         = {978-3-642-24411-7},
  timestamp    = {Tue, 14 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/2011.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1102-2041,
  author       = {Andr{\'{a}}s Antos and
                  G{\'{a}}bor Bart{\'{o}}k and
                  D{\'{a}}vid P{\'{a}}l and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Toward a Classification of Finite Partial-Monitoring Games},
  journal      = {CoRR},
  volume       = {abs/1102.2041},
  year         = {2011},
  url          = {http://arxiv.org/abs/1102.2041},
  eprinttype    = {arXiv},
  eprint       = {1102.2041},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1102-2041.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1102-2670,
  author       = {Yasin Abbasi{-}Yadkori and
                  D{\'{a}}vid P{\'{a}}l and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Online Least Squares Estimation with Self-Normalized Processes: An
                  Application to Bandit Problems},
  journal      = {CoRR},
  volume       = {abs/1102.2670},
  year         = {2011},
  url          = {http://arxiv.org/abs/1102.2670},
  eprinttype    = {arXiv},
  eprint       = {1102.2670},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1102-2670.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1108-4961,
  author       = {Andr{\'{a}}s Antos and
                  G{\'{a}}bor Bart{\'{o}}k and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Non-trivial two-armed partial-monitoring games are bandits},
  journal      = {CoRR},
  volume       = {abs/1108.4961},
  year         = {2011},
  url          = {http://arxiv.org/abs/1108.4961},
  eprinttype    = {arXiv},
  eprint       = {1108.4961},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1108-4961.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1112-4607,
  author       = {Arash Afkanpour and
                  Csaba Szepesv{\'{a}}ri and
                  Michael H. Bowling},
  title        = {Alignment Based Kernel Learning with a Continuous Set of Base Kernels},
  journal      = {CoRR},
  volume       = {abs/1112.4607},
  year         = {2011},
  url          = {http://arxiv.org/abs/1112.4607},
  eprinttype    = {arXiv},
  eprint       = {1112.4607},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1112-4607.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@book{DBLP:series/synthesis/2010Szepesvari,
  author       = {Csaba Szepesv{\'{a}}ri},
  title        = {Algorithms for Reinforcement Learning},
  series       = {Synthesis Lectures on Artificial Intelligence and Machine Learning},
  publisher    = {Morgan {\&} Claypool Publishers},
  year         = {2010},
  url          = {https://doi.org/10.2200/S00268ED1V01Y201005AIM009},
  doi          = {10.2200/S00268ED1V01Y201005AIM009},
  isbn         = {978-3-031-00423-0},
  timestamp    = {Tue, 17 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/series/synthesis/2010Szepesvari.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/iandc/BartokSZ10,
  author       = {G{\'{a}}bor Bart{\'{o}}k and
                  Csaba Szepesv{\'{a}}ri and
                  Sandra Zilles},
  title        = {Models of active learning in group-structured state spaces},
  journal      = {Inf. Comput.},
  volume       = {208},
  number       = {4},
  pages        = {364--384},
  year         = {2010},
  url          = {https://doi.org/10.1016/j.ic.2009.09.001},
  doi          = {10.1016/J.IC.2009.09.001},
  timestamp    = {Fri, 12 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/iandc/BartokSZ10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tcs/AntosGS10,
  author       = {Andr{\'{a}}s Antos and
                  Varun Grover and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Active learning in heteroscedastic noise},
  journal      = {Theor. Comput. Sci.},
  volume       = {411},
  number       = {29-30},
  pages        = {2712--2728},
  year         = {2010},
  url          = {https://doi.org/10.1016/j.tcs.2010.04.007},
  doi          = {10.1016/J.TCS.2010.04.007},
  timestamp    = {Wed, 17 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tcs/AntosGS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/BartokPS10,
  author       = {G{\'{a}}bor Bart{\'{o}}k and
                  D{\'{a}}vid P{\'{a}}l and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Marcus Hutter and
                  Frank Stephan and
                  Vladimir Vovk and
                  Thomas Zeugmann},
  title        = {Toward a Classification of Finite Partial-Monitoring Games},
  booktitle    = {Algorithmic Learning Theory, 21st International Conference, {ALT}
                  2010, Canberra, Australia, October 6-8, 2010. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {6331},
  pages        = {224--238},
  publisher    = {Springer},
  year         = {2010},
  url          = {https://doi.org/10.1007/978-3-642-16108-7\_20},
  doi          = {10.1007/978-3-642-16108-7\_20},
  timestamp    = {Tue, 14 May 2019 10:00:51 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/BartokPS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/NeuGS10,
  author       = {Gergely Neu and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Adam Tauman Kalai and
                  Mehryar Mohri},
  title        = {The Online Loop-free Stochastic Shortest-Path Problem},
  booktitle    = {{COLT} 2010 - The 23rd Conference on Learning Theory, Haifa, Israel,
                  June 27-29, 2010},
  pages        = {231--243},
  publisher    = {Omnipress},
  year         = {2010},
  url          = {http://colt2010.haifa.il.ibm.com/papers/COLT2010proceedings.pdf\#page=239},
  timestamp    = {Tue, 25 Jan 2022 13:23:33 +0100},
  biburl       = {https://dblp.org/rec/conf/colt/NeuGS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/MaeiSBS10,
  author       = {Hamid Reza Maei and
                  Csaba Szepesv{\'{a}}ri and
                  Shalabh Bhatnagar and
                  Richard S. Sutton},
  editor       = {Johannes F{\"{u}}rnkranz and
                  Thorsten Joachims},
  title        = {Toward Off-Policy Learning Control with Function Approximation},
  booktitle    = {Proceedings of the 27th International Conference on Machine Learning
                  (ICML-10), June 21-24, 2010, Haifa, Israel},
  pages        = {719--726},
  publisher    = {Omnipress},
  year         = {2010},
  url          = {https://icml.cc/Conferences/2010/papers/627.pdf},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/MaeiSBS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/LiPSG10,
  author       = {Liuyang Li and
                  Barnab{\'{a}}s P{\'{o}}czos and
                  Csaba Szepesv{\'{a}}ri and
                  Russell Greiner},
  editor       = {Johannes F{\"{u}}rnkranz and
                  Thorsten Joachims},
  title        = {Budgeted Distribution Learning of Belief Net Parameters},
  booktitle    = {Proceedings of the 27th International Conference on Machine Learning
                  (ICML-10), June 21-24, 2010, Haifa, Israel},
  pages        = {879--886},
  publisher    = {Omnipress},
  year         = {2010},
  url          = {https://icml.cc/Conferences/2010/papers/406.pdf},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/LiPSG10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SzitaS10,
  author       = {Istvan Szita and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Johannes F{\"{u}}rnkranz and
                  Thorsten Joachims},
  title        = {Model-based reinforcement learning with nearly tight exploration complexity
                  bounds},
  booktitle    = {Proceedings of the 27th International Conference on Machine Learning
                  (ICML-10), June 21-24, 2010, Haifa, Israel},
  pages        = {1031--1038},
  publisher    = {Omnipress},
  year         = {2010},
  url          = {https://icml.cc/Conferences/2010/papers/546.pdf},
  timestamp    = {Wed, 03 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SzitaS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/Abbasi-YadkoriMS10,
  author       = {Yasin Abbasi{-}Yadkori and
                  Joseph Modayil and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Extending rapidly-exploring random trees for asymptotically optimal
                  anytime motion planning},
  booktitle    = {2010 {IEEE/RSJ} International Conference on Intelligent Robots and
                  Systems, October 18-22, 2010, Taipei, Taiwan},
  pages        = {127--132},
  publisher    = {{IEEE}},
  year         = {2010},
  url          = {https://doi.org/10.1109/IROS.2010.5650614},
  doi          = {10.1109/IROS.2010.5650614},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/Abbasi-YadkoriMS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/FarahmandMS10,
  author       = {Amir Massoud Farahmand and
                  R{\'{e}}mi Munos and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {John D. Lafferty and
                  Christopher K. I. Williams and
                  John Shawe{-}Taylor and
                  Richard S. Zemel and
                  Aron Culotta},
  title        = {Error Propagation for Approximate Policy and Value Iteration},
  booktitle    = {Advances in Neural Information Processing Systems 23: 24th Annual
                  Conference on Neural Information Processing Systems 2010. Proceedings
                  of a meeting held 6-9 December 2010, Vancouver, British Columbia,
                  Canada},
  pages        = {568--576},
  publisher    = {Curran Associates, Inc.},
  year         = {2010},
  url          = {https://proceedings.neurips.cc/paper/2010/hash/65cc2c8205a05d7379fa3a6386f710e1-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/FarahmandMS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/FilippiCGS10,
  author       = {Sarah Filippi and
                  Olivier Capp{\'{e}} and
                  Aur{\'{e}}lien Garivier and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {John D. Lafferty and
                  Christopher K. I. Williams and
                  John Shawe{-}Taylor and
                  Richard S. Zemel and
                  Aron Culotta},
  title        = {Parametric Bandits: The Generalized Linear Case},
  booktitle    = {Advances in Neural Information Processing Systems 23: 24th Annual
                  Conference on Neural Information Processing Systems 2010. Proceedings
                  of a meeting held 6-9 December 2010, Vancouver, British Columbia,
                  Canada},
  pages        = {586--594},
  publisher    = {Curran Associates, Inc.},
  year         = {2010},
  url          = {https://proceedings.neurips.cc/paper/2010/hash/c2626d850c80ea07e7511bbae4c76f4b-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/FilippiCGS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/NeuGSA10,
  author       = {Gergely Neu and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s Antos},
  editor       = {John D. Lafferty and
                  Christopher K. I. Williams and
                  John Shawe{-}Taylor and
                  Richard S. Zemel and
                  Aron Culotta},
  title        = {Online Markov Decision Processes under Bandit Feedback},
  booktitle    = {Advances in Neural Information Processing Systems 23: 24th Annual
                  Conference on Neural Information Processing Systems 2010. Proceedings
                  of a meeting held 6-9 December 2010, Vancouver, British Columbia,
                  Canada},
  pages        = {1804--1812},
  publisher    = {Curran Associates, Inc.},
  year         = {2010},
  url          = {https://proceedings.neurips.cc/paper/2010/hash/7bb060764a818184ebb1cc0d43d382aa-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/NeuGSA10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/PalPS10,
  author       = {D{\'{a}}vid P{\'{a}}l and
                  Barnab{\'{a}}s P{\'{o}}czos and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {John D. Lafferty and
                  Christopher K. I. Williams and
                  John Shawe{-}Taylor and
                  Richard S. Zemel and
                  Aron Culotta},
  title        = {Estimation of Renyi Entropy and Mutual Information Based on Generalized
                  Nearest-Neighbor Graphs},
  booktitle    = {Advances in Neural Information Processing Systems 23: 24th Annual
                  Conference on Neural Information Processing Systems 2010. Proceedings
                  of a meeting held 6-9 December 2010, Vancouver, British Columbia,
                  Canada},
  pages        = {1849--1857},
  publisher    = {Curran Associates, Inc.},
  year         = {2010},
  url          = {https://proceedings.neurips.cc/paper/2010/hash/577ef1154f3240ad5b9b413aa7346a1e-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/PalPS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/jmlr/PoczosKS10,
  author       = {Barnab{\'{a}}s P{\'{o}}czos and
                  Sergey Kirshner and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Yee Whye Teh and
                  D. Mike Titterington},
  title        = {{REGO:} Rank-based Estimation of Renyi Information using Euclidean
                  Graph Optimization},
  booktitle    = {Proceedings of the Thirteenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2010, Chia Laguna Resort, Sardinia,
                  Italy, May 13-15, 2010},
  series       = {{JMLR} Proceedings},
  volume       = {9},
  pages        = {605--612},
  publisher    = {JMLR.org},
  year         = {2010},
  url          = {http://proceedings.mlr.press/v9/poczos10a.html},
  timestamp    = {Wed, 29 May 2019 08:41:44 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/PoczosKS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/jmlr/TormaGS10,
  author       = {P{\'{e}}ter Torma and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Yee Whye Teh and
                  D. Mike Titterington},
  title        = {A Markov-Chain Monte Carlo Approach to Simultaneous Localization and
                  Mapping},
  booktitle    = {Proceedings of the Thirteenth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2010, Chia Laguna Resort, Sardinia,
                  Italy, May 13-15, 2010},
  series       = {{JMLR} Proceedings},
  volume       = {9},
  pages        = {852--859},
  publisher    = {JMLR.org},
  year         = {2010},
  url          = {http://proceedings.mlr.press/v9/torma10a.html},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/TormaGS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1001-4475,
  author       = {S{\'{e}}bastien Bubeck and
                  R{\'{e}}mi Munos and
                  Gilles Stoltz and
                  Csaba Szepesv{\'{a}}ri},
  title        = {X-Armed Bandits},
  journal      = {CoRR},
  volume       = {abs/1001.4475},
  year         = {2010},
  url          = {http://arxiv.org/abs/1001.4475},
  eprinttype    = {arXiv},
  eprint       = {1001.4475},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1001-4475.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1003-1954,
  author       = {D{\'{a}}vid P{\'{a}}l and
                  Barnab{\'{a}}s P{\'{o}}czos and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Estimation of R{\'{e}}nyi Entropy and Mutual Information Based
                  on Generalized Nearest-Neighbor Graphs},
  journal      = {CoRR},
  volume       = {abs/1003.1954},
  year         = {2010},
  url          = {http://arxiv.org/abs/1003.1954},
  eprinttype    = {arXiv},
  eprint       = {1003.1954},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1003-1954.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/NeuS09,
  author       = {Gergely Neu and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Training parsers by inverse reinforcement learning},
  journal      = {Mach. Learn.},
  volume       = {77},
  number       = {2-3},
  pages        = {303--337},
  year         = {2009},
  url          = {https://doi.org/10.1007/s10994-009-5110-1},
  doi          = {10.1007/S10994-009-5110-1},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/NeuS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tcs/AudibertMS09,
  author       = {Jean{-}Yves Audibert and
                  R{\'{e}}mi Munos and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Exploration-exploitation tradeoff using variance estimates in multi-armed
                  bandits},
  journal      = {Theor. Comput. Sci.},
  volume       = {410},
  number       = {19},
  pages        = {1876--1902},
  year         = {2009},
  url          = {https://doi.org/10.1016/j.tcs.2009.01.016},
  doi          = {10.1016/J.TCS.2009.01.016},
  timestamp    = {Wed, 17 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tcs/AudibertMS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/amcc/FarahmandGSM09,
  author       = {Amir Massoud Farahmand and
                  Mohammad Ghavamzadeh and
                  Csaba Szepesv{\'{a}}ri and
                  Shie Mannor},
  title        = {Regularized Fitted Q-Iteration for planning in continuous-space Markovian
                  decision problems},
  booktitle    = {American Control Conference, {ACC} 2009. St. Louis, Missouri, USA,
                  June 10-12, 2009},
  pages        = {725--730},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/ACC.2009.5160611},
  doi          = {10.1109/ACC.2009.5160611},
  timestamp    = {Fri, 03 Dec 2021 13:02:58 +0100},
  biburl       = {https://dblp.org/rec/conf/amcc/FarahmandGSM09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cdc/YaoBS09,
  author       = {Hengshuai Yao and
                  Shalabh Bhatnagar and
                  Csaba Szepesv{\'{a}}ri},
  title        = {{LMS-2:} Towards an algorithm that is as cheap as {LMS} and almost
                  as efficient as {RLS}},
  booktitle    = {Proceedings of the 48th {IEEE} Conference on Decision and Control,
                  {CDC} 2009, combined withe the 28th Chinese Control Conference, December
                  16-18, 2009, Shanghai, China},
  pages        = {1181--1188},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/CDC.2009.5400370},
  doi          = {10.1109/CDC.2009.5400370},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cdc/YaoBS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/AudibertALMRS09,
  author       = {Jean{-}Yves Audibert and
                  Peter Auer and
                  Alessandro Lazaric and
                  R{\'{e}}mi Munos and
                  Daniil Ryabko and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Andrea Pohoreckyj Danyluk and
                  L{\'{e}}on Bottou and
                  Michael L. Littman},
  title        = {Workshop summary: On-line learning with limited feedback},
  booktitle    = {Proceedings of the 26th Annual International Conference on Machine
                  Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {382},
  pages        = {8},
  publisher    = {{ACM}},
  year         = {2009},
  url          = {https://doi.org/10.1145/1553374.1553547},
  doi          = {10.1145/1553374.1553547},
  timestamp    = {Tue, 06 Nov 2018 16:58:29 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/AudibertALMRS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/FarhangfarGS09,
  author       = {Alireza Farhangfar and
                  Russell Greiner and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Andrea Pohoreckyj Danyluk and
                  L{\'{e}}on Bottou and
                  Michael L. Littman},
  title        = {Learning to segment from a few well-selected training images},
  booktitle    = {Proceedings of the 26th Annual International Conference on Machine
                  Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {382},
  pages        = {305--312},
  publisher    = {{ACM}},
  year         = {2009},
  url          = {https://doi.org/10.1145/1553374.1553413},
  doi          = {10.1145/1553374.1553413},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/FarhangfarGS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/PoczosASGS09,
  author       = {Barnab{\'{a}}s P{\'{o}}czos and
                  Yasin Abbasi{-}Yadkori and
                  Csaba Szepesv{\'{a}}ri and
                  Russell Greiner and
                  Nathan R. Sturtevant},
  editor       = {Andrea Pohoreckyj Danyluk and
                  L{\'{e}}on Bottou and
                  Michael L. Littman},
  title        = {Learning when to stop thinking and do something!},
  booktitle    = {Proceedings of the 26th Annual International Conference on Machine
                  Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {382},
  pages        = {825--832},
  publisher    = {{ACM}},
  year         = {2009},
  url          = {https://doi.org/10.1145/1553374.1553480},
  doi          = {10.1145/1553374.1553480},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/PoczosASGS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SuttonMPBSSW09,
  author       = {Richard S. Sutton and
                  Hamid Reza Maei and
                  Doina Precup and
                  Shalabh Bhatnagar and
                  David Silver and
                  Csaba Szepesv{\'{a}}ri and
                  Eric Wiewiora},
  editor       = {Andrea Pohoreckyj Danyluk and
                  L{\'{e}}on Bottou and
                  Michael L. Littman},
  title        = {Fast gradient-descent methods for temporal-difference learning with
                  linear function approximation},
  booktitle    = {Proceedings of the 26th Annual International Conference on Machine
                  Learning, {ICML} 2009, Montreal, Quebec, Canada, June 14-18, 2009},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {382},
  pages        = {993--1000},
  publisher    = {{ACM}},
  year         = {2009},
  url          = {https://doi.org/10.1145/1553374.1553501},
  doi          = {10.1145/1553374.1553501},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SuttonMPBSSW09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icra/FarahmandSJS09,
  author       = {Amir Massoud Farahmand and
                  Azad Shademan and
                  Martin J{\"{a}}gersand and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Model-based and model-free reinforcement learning for visual servoing},
  booktitle    = {2009 {IEEE} International Conference on Robotics and Automation, {ICRA}
                  2009, Kobe, Japan, May 12-17, 2009},
  pages        = {2917--2924},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/ROBOT.2009.5152834},
  doi          = {10.1109/ROBOT.2009.5152834},
  timestamp    = {Mon, 22 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icra/FarahmandSJS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/MaeiSBPSS09,
  author       = {Hamid Reza Maei and
                  Csaba Szepesv{\'{a}}ri and
                  Shalabh Bhatnagar and
                  Doina Precup and
                  David Silver and
                  Richard S. Sutton},
  editor       = {Yoshua Bengio and
                  Dale Schuurmans and
                  John D. Lafferty and
                  Christopher K. I. Williams and
                  Aron Culotta},
  title        = {Convergent Temporal-Difference Learning with Arbitrary Smooth Function
                  Approximation},
  booktitle    = {Advances in Neural Information Processing Systems 22: 23rd Annual
                  Conference on Neural Information Processing Systems 2009. Proceedings
                  of a meeting held 7-10 December 2009, Vancouver, British Columbia,
                  Canada},
  pages        = {1204--1212},
  publisher    = {Curran Associates, Inc.},
  year         = {2009},
  url          = {https://proceedings.neurips.cc/paper/2009/hash/3a15c7d0bbe60300a39f76f8a5ba6896-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/MaeiSBPSS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/YaoSBDS09,
  author       = {Hengshuai Yao and
                  Richard S. Sutton and
                  Shalabh Bhatnagar and
                  Diao Dongcui and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Yoshua Bengio and
                  Dale Schuurmans and
                  John D. Lafferty and
                  Christopher K. I. Williams and
                  Aron Culotta},
  title        = {Multi-Step Dyna Planning for Policy Evaluation and Control},
  booktitle    = {Advances in Neural Information Processing Systems 22: 23rd Annual
                  Conference on Neural Information Processing Systems 2009. Proceedings
                  of a meeting held 7-10 December 2009, Vancouver, British Columbia,
                  Canada},
  pages        = {2187--2195},
  publisher    = {Curran Associates, Inc.},
  year         = {2009},
  url          = {https://proceedings.neurips.cc/paper/2009/hash/c52f1bd66cc19d05628bd8bf27af3ad6-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/YaoSBDS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/YuLSS09,
  author       = {Yaoliang Yu and
                  Yuxi Li and
                  Dale Schuurmans and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Yoshua Bengio and
                  Dale Schuurmans and
                  John D. Lafferty and
                  Christopher K. I. Williams and
                  Aron Culotta},
  title        = {A General Projection Property for Distribution Families},
  booktitle    = {Advances in Neural Information Processing Systems 22: 23rd Annual
                  Conference on Neural Information Processing Systems 2009. Proceedings
                  of a meeting held 7-10 December 2009, Vancouver, British Columbia,
                  Canada},
  pages        = {2232--2240},
  publisher    = {Curran Associates, Inc.},
  year         = {2009},
  url          = {https://proceedings.neurips.cc/paper/2009/hash/a684eceee76fc522773286a895bc8436-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/YuLSS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/jmlr/LiSS09,
  author       = {Yuxi Li and
                  Csaba Szepesv{\'{a}}ri and
                  Dale Schuurmans},
  editor       = {David A. Van Dyk and
                  Max Welling},
  title        = {Learning Exercise Policies for American Options},
  booktitle    = {Proceedings of the Twelfth International Conference on Artificial
                  Intelligence and Statistics, {AISTATS} 2009, Clearwater Beach, Florida,
                  USA, April 16-18, 2009},
  series       = {{JMLR} Proceedings},
  volume       = {5},
  pages        = {352--359},
  publisher    = {JMLR.org},
  year         = {2009},
  url          = {http://proceedings.mlr.press/v5/li09d.html},
  timestamp    = {Wed, 29 May 2019 08:41:44 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/LiSS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/MunosS08,
  author       = {R{\'{e}}mi Munos and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Finite-Time Bounds for Fitted Value Iteration},
  journal      = {J. Mach. Learn. Res.},
  volume       = {9},
  pages        = {815--857},
  year         = {2008},
  url          = {https://dl.acm.org/doi/10.5555/1390681.1390708},
  doi          = {10.5555/1390681.1390708},
  timestamp    = {Thu, 02 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/MunosS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/AntosSM08,
  author       = {Andr{\'{a}}s Antos and
                  Csaba Szepesv{\'{a}}ri and
                  R{\'{e}}mi Munos},
  title        = {Learning near-optimal policies with Bellman-residual minimization
                  based fitted policy iteration and a single sample path},
  journal      = {Mach. Learn.},
  volume       = {71},
  number       = {1},
  pages        = {89--129},
  year         = {2008},
  url          = {https://doi.org/10.1007/s10994-007-5038-2},
  doi          = {10.1007/S10994-007-5038-2},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/AntosSM08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/AntosGS08,
  author       = {Andr{\'{a}}s Antos and
                  Varun Grover and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Yoav Freund and
                  L{\'{a}}szl{\'{o}} Gy{\"{o}}rfi and
                  Gy{\"{o}}rgy Tur{\'{a}}n and
                  Thomas Zeugmann},
  title        = {Active Learning in Multi-armed Bandits},
  booktitle    = {Algorithmic Learning Theory, 19th International Conference, {ALT}
                  2008, Budapest, Hungary, October 13-16, 2008. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {5254},
  pages        = {287--302},
  publisher    = {Springer},
  year         = {2008},
  url          = {https://doi.org/10.1007/978-3-540-87987-9\_25},
  doi          = {10.1007/978-3-540-87987-9\_25},
  timestamp    = {Tue, 14 May 2019 10:00:51 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/AntosGS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/BartokSZ08,
  author       = {G{\'{a}}bor Bart{\'{o}}k and
                  Csaba Szepesv{\'{a}}ri and
                  Sandra Zilles},
  editor       = {Yoav Freund and
                  L{\'{a}}szl{\'{o}} Gy{\"{o}}rfi and
                  Gy{\"{o}}rgy Tur{\'{a}}n and
                  Thomas Zeugmann},
  title        = {Active Learning of Group-Structured Environments},
  booktitle    = {Algorithmic Learning Theory, 19th International Conference, {ALT}
                  2008, Budapest, Hungary, October 13-16, 2008. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {5254},
  pages        = {329--343},
  publisher    = {Springer},
  year         = {2008},
  url          = {https://doi.org/10.1007/978-3-540-87987-9\_28},
  doi          = {10.1007/978-3-540-87987-9\_28},
  timestamp    = {Fri, 26 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/BartokSZ08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ewrl/FarahmandGSM08,
  author       = {Amir Massoud Farahmand and
                  Mohammad Ghavamzadeh and
                  Csaba Szepesv{\'{a}}ri and
                  Shie Mannor},
  editor       = {Sertan Girgin and
                  Manuel Loth and
                  R{\'{e}}mi Munos and
                  Philippe Preux and
                  Daniil Ryabko},
  title        = {Regularized Fitted Q-Iteration: Application to Planning},
  booktitle    = {Recent Advances in Reinforcement Learning, 8th European Workshop,
                  {EWRL} 2008, Villeneuve d'Ascq, France, June 30 - July 3, 2008, Revised
                  and Selected Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {5323},
  pages        = {55--68},
  publisher    = {Springer},
  year         = {2008},
  url          = {https://doi.org/10.1007/978-3-540-89722-4\_5},
  doi          = {10.1007/978-3-540-89722-4\_5},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ewrl/FarahmandGSM08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/MnihSA08,
  author       = {Volodymyr Mnih and
                  Csaba Szepesv{\'{a}}ri and
                  Jean{-}Yves Audibert},
  editor       = {William W. Cohen and
                  Andrew McCallum and
                  Sam T. Roweis},
  title        = {Empirical Bernstein stopping},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Fifth International Conference
                  {(ICML} 2008), Helsinki, Finland, June 5-9, 2008},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {307},
  pages        = {672--679},
  publisher    = {{ACM}},
  year         = {2008},
  url          = {https://doi.org/10.1145/1390156.1390241},
  doi          = {10.1145/1390156.1390241},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/MnihSA08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BubeckMSS08,
  author       = {S{\'{e}}bastien Bubeck and
                  R{\'{e}}mi Munos and
                  Gilles Stoltz and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Daphne Koller and
                  Dale Schuurmans and
                  Yoshua Bengio and
                  L{\'{e}}on Bottou},
  title        = {Online Optimization in X-Armed Bandits},
  booktitle    = {Advances in Neural Information Processing Systems 21, Proceedings
                  of the Twenty-Second Annual Conference on Neural Information Processing
                  Systems, Vancouver, British Columbia, Canada, December 8-11, 2008},
  pages        = {201--208},
  publisher    = {Curran Associates, Inc.},
  year         = {2008},
  url          = {https://proceedings.neurips.cc/paper/2008/hash/f387624df552cea2f369918c5e1e12bc-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/BubeckMSS08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/FarahmandGSM08,
  author       = {Amir Massoud Farahmand and
                  Mohammad Ghavamzadeh and
                  Csaba Szepesv{\'{a}}ri and
                  Shie Mannor},
  editor       = {Daphne Koller and
                  Dale Schuurmans and
                  Yoshua Bengio and
                  L{\'{e}}on Bottou},
  title        = {Regularized Policy Iteration},
  booktitle    = {Advances in Neural Information Processing Systems 21, Proceedings
                  of the Twenty-Second Annual Conference on Neural Information Processing
                  Systems, Vancouver, British Columbia, Canada, December 8-11, 2008},
  pages        = {441--448},
  publisher    = {Curran Associates, Inc.},
  year         = {2008},
  url          = {https://proceedings.neurips.cc/paper/2008/hash/aeb3135b436aa55373822c010763dd54-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/FarahmandGSM08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SuttonSM08,
  author       = {Richard S. Sutton and
                  Csaba Szepesv{\'{a}}ri and
                  Hamid Reza Maei},
  editor       = {Daphne Koller and
                  Dale Schuurmans and
                  Yoshua Bengio and
                  L{\'{e}}on Bottou},
  title        = {A Convergent O(n) Temporal-difference Algorithm for Off-policy Learning
                  with Linear Function Approximation},
  booktitle    = {Advances in Neural Information Processing Systems 21, Proceedings
                  of the Twenty-Second Annual Conference on Neural Information Processing
                  Systems, Vancouver, British Columbia, Canada, December 8-11, 2008},
  pages        = {1609--1616},
  publisher    = {Curran Associates, Inc.},
  year         = {2008},
  url          = {https://proceedings.neurips.cc/paper/2008/hash/e0c641195b27425bb056ac56f8953d24-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/SuttonSM08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/IsazaSBG08,
  author       = {Alejandro Isaza and
                  Csaba Szepesv{\'{a}}ri and
                  Vadim Bulitko and
                  Russell Greiner},
  editor       = {David A. McAllester and
                  Petri Myllym{\"{a}}ki},
  title        = {Speeding Up Planning in Markov Decision Processes via Automatically
                  Constructed Abstraction},
  booktitle    = {{UAI} 2008, Proceedings of the 24th Conference in Uncertainty in Artificial
                  Intelligence, Helsinki, Finland, July 9-12, 2008},
  pages        = {306--314},
  publisher    = {{AUAI} Press},
  year         = {2008},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1955\&\#38;proceeding\_id=24},
  timestamp    = {Wed, 03 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/IsazaSBG08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/SuttonSGB08,
  author       = {Richard S. Sutton and
                  Csaba Szepesv{\'{a}}ri and
                  Alborz Geramifard and
                  Michael H. Bowling},
  editor       = {David A. McAllester and
                  Petri Myllym{\"{a}}ki},
  title        = {Dyna-Style Planning with Linear Function Approximation and Prioritized
                  Sweeping},
  booktitle    = {{UAI} 2008, Proceedings of the 24th Conference in Uncertainty in Artificial
                  Intelligence, Helsinki, Finland, July 9-12, 2008},
  pages        = {528--536},
  publisher    = {{AUAI} Press},
  year         = {2008},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1971\&\#38;proceeding\_id=24},
  timestamp    = {Wed, 03 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/SuttonSGB08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/AudibertMS07,
  author       = {Jean{-}Yves Audibert and
                  R{\'{e}}mi Munos and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Marcus Hutter and
                  Rocco A. Servedio and
                  Eiji Takimoto},
  title        = {Tuning Bandit Algorithms in Stochastic Environments},
  booktitle    = {Algorithmic Learning Theory, 18th International Conference, {ALT}
                  2007, Sendai, Japan, October 1-4, 2007, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {4754},
  pages        = {150--165},
  publisher    = {Springer},
  year         = {2007},
  url          = {https://doi.org/10.1007/978-3-540-75225-7\_15},
  doi          = {10.1007/978-3-540-75225-7\_15},
  timestamp    = {Tue, 14 May 2019 10:00:51 +0200},
  biburl       = {https://dblp.org/rec/conf/alt/AudibertMS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/AuerOS07,
  author       = {Peter Auer and
                  Ronald Ortner and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Nader H. Bshouty and
                  Claudio Gentile},
  title        = {Improved Rates for the Stochastic Continuum-Armed Bandit Problem},
  booktitle    = {Learning Theory, 20th Annual Conference on Learning Theory, {COLT}
                  2007, San Diego, CA, USA, June 13-15, 2007, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {4539},
  pages        = {454--468},
  publisher    = {Springer},
  year         = {2007},
  url          = {https://doi.org/10.1007/978-3-540-72927-3\_33},
  doi          = {10.1007/978-3-540-72927-3\_33},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/colt/AuerOS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/FarahmandSA07,
  author       = {Amir Massoud Farahmand and
                  Csaba Szepesv{\'{a}}ri and
                  Jean{-}Yves Audibert},
  editor       = {Zoubin Ghahramani},
  title        = {Manifold-adaptive dimension estimation},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Fourth International Conference
                  {(ICML} 2007), Corvallis, Oregon, USA, June 20-24, 2007},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {227},
  pages        = {265--272},
  publisher    = {{ACM}},
  year         = {2007},
  url          = {https://doi.org/10.1145/1273496.1273530},
  doi          = {10.1145/1273496.1273530},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/FarahmandSA07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/GyorgyKSS07,
  author       = {Andr{\'{a}}s Gy{\"{o}}rgy and
                  Levente Kocsis and
                  Ivett Szab{\'{o}} and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Manuela M. Veloso},
  title        = {Continuous Time Associative Bandit Problems},
  booktitle    = {{IJCAI} 2007, Proceedings of the 20th International Joint Conference
                  on Artificial Intelligence, Hyderabad, India, January 6-12, 2007},
  pages        = {830--835},
  year         = {2007},
  url          = {http://ijcai.org/Proceedings/07/Papers/133.pdf},
  timestamp    = {Tue, 19 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/GyorgyKSS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/BiroSS07,
  author       = {Istv{\'{a}}n B{\'{\i}}r{\'{o}} and
                  Zolt{\'{a}}n Szamonek and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Manuela M. Veloso},
  title        = {Sequence Prediction Exploiting Similary Information},
  booktitle    = {{IJCAI} 2007, Proceedings of the 20th International Joint Conference
                  on Artificial Intelligence, Hyderabad, India, January 6-12, 2007},
  pages        = {1576--1581},
  year         = {2007},
  url          = {http://ijcai.org/Proceedings/07/Papers/254.pdf},
  timestamp    = {Tue, 20 Aug 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/BiroSS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/AntosMS07,
  author       = {Andr{\'{a}}s Antos and
                  R{\'{e}}mi Munos and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {John C. Platt and
                  Daphne Koller and
                  Yoram Singer and
                  Sam T. Roweis},
  title        = {Fitted Q-iteration in continuous action-space MDPs},
  booktitle    = {Advances in Neural Information Processing Systems 20, Proceedings
                  of the Twenty-First Annual Conference on Neural Information Processing
                  Systems, Vancouver, British Columbia, Canada, December 3-6, 2007},
  pages        = {9--16},
  publisher    = {Curran Associates, Inc.},
  year         = {2007},
  url          = {https://proceedings.neurips.cc/paper/2007/hash/da0d1111d2dc5d489242e60ebcbaf988-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/AntosMS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/NeuS07,
  author       = {Gergely Neu and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Ronald Parr and
                  Linda C. van der Gaag},
  title        = {Apprenticeship Learning using Inverse Reinforcement Learning and Gradient
                  Methods},
  booktitle    = {{UAI} 2007, Proceedings of the Twenty-Third Conference on Uncertainty
                  in Artificial Intelligence, Vancouver, BC, Canada, July 19-22, 2007},
  pages        = {295--302},
  publisher    = {{AUAI} Press},
  year         = {2007},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=1702\&\#38;proceeding\_id=23},
  timestamp    = {Wed, 03 Feb 2021 11:09:18 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/NeuS07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmm2/TormaS06,
  author       = {P{\'{e}}ter Torma and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Local Importance Sampling: {A} Novel Technique to Enhance Particle
                  Filtering},
  journal      = {J. Multim.},
  volume       = {1},
  number       = {1},
  pages        = {32--43},
  year         = {2006},
  url          = {https://doi.org/10.4304/jmm.1.1.32-43},
  doi          = {10.4304/JMM.1.1.32-43},
  timestamp    = {Fri, 18 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmm2/TormaS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/KocsisS06,
  author       = {Levente Kocsis and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Universal parameter optimisation in games based on {SPSA}},
  journal      = {Mach. Learn.},
  volume       = {63},
  number       = {3},
  pages        = {249--286},
  year         = {2006},
  url          = {https://doi.org/10.1007/s10994-006-6888-8},
  doi          = {10.1007/S10994-006-6888-8},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/KocsisS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acg/KocsisSW06,
  author       = {Levente Kocsis and
                  Csaba Szepesv{\'{a}}ri and
                  Mark H. M. Winands},
  editor       = {H. Jaap van den Herik and
                  Shun{-}chin Hsu and
                  Tsan{-}sheng Hsu and
                  H. H. L. M. Donkers},
  title        = {{RSPSA:} Enhanced Parameter Optimization in Games},
  booktitle    = {Advances in Computer Games, 11th International Conference, {ACG} 2005,
                  Taipei, Taiwan, September 6-9, 2005. Revised Papers},
  series       = {Lecture Notes in Computer Science},
  volume       = {4250},
  pages        = {39--56},
  publisher    = {Springer},
  year         = {2006},
  url          = {https://doi.org/10.1007/11922155\_4},
  doi          = {10.1007/11922155\_4},
  timestamp    = {Tue, 14 May 2019 10:00:53 +0200},
  biburl       = {https://dblp.org/rec/conf/acg/KocsisSW06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/colt/AntosSM06,
  author       = {Andr{\'{a}}s Antos and
                  Csaba Szepesv{\'{a}}ri and
                  R{\'{e}}mi Munos},
  editor       = {G{\'{a}}bor Lugosi and
                  Hans Ulrich Simon},
  title        = {Learning Near-Optimal Policies with Bellman-Residual Minimization
                  Based Fitted Policy Iteration and a Single Sample Path},
  booktitle    = {Learning Theory, 19th Annual Conference on Learning Theory, {COLT}
                  2006, Pittsburgh, PA, USA, June 22-25, 2006, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {4005},
  pages        = {574--588},
  publisher    = {Springer},
  year         = {2006},
  url          = {https://doi.org/10.1007/11776420\_42},
  doi          = {10.1007/11776420\_42},
  timestamp    = {Fri, 08 Sep 2023 21:18:17 +0200},
  biburl       = {https://dblp.org/rec/conf/colt/AntosSM06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ecml/KocsisS06,
  author       = {Levente Kocsis and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Johannes F{\"{u}}rnkranz and
                  Tobias Scheffer and
                  Myra Spiliopoulou},
  title        = {Bandit Based Monte-Carlo Planning},
  booktitle    = {Machine Learning: {ECML} 2006, 17th European Conference on Machine
                  Learning, Berlin, Germany, September 18-22, 2006, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {4212},
  pages        = {282--293},
  publisher    = {Springer},
  year         = {2006},
  url          = {https://doi.org/10.1007/11871842\_29},
  doi          = {10.1007/11871842\_29},
  timestamp    = {Tue, 14 May 2019 10:00:54 +0200},
  biburl       = {https://dblp.org/rec/conf/ecml/KocsisS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdm/SzamonekS05,
  author       = {Zolt{\'{a}}n Szamonek and
                  Csaba Szepesv{\'{a}}ri},
  title        = {X-mHMM: An Efficient Algorithm for Training Mixtures of HMMs When
                  the Number of Mixtures Is Unknown},
  booktitle    = {Proceedings of the 5th {IEEE} International Conference on Data Mining
                  {(ICDM} 2005), 27-30 November 2005, Houston, Texas, {USA}},
  pages        = {434--441},
  publisher    = {{IEEE} Computer Society},
  year         = {2005},
  url          = {https://doi.org/10.1109/ICDM.2005.156},
  doi          = {10.1109/ICDM.2005.156},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icdm/SzamonekS05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SzepesvariM05,
  author       = {Csaba Szepesv{\'{a}}ri and
                  R{\'{e}}mi Munos},
  editor       = {Luc De Raedt and
                  Stefan Wrobel},
  title        = {Finite time bounds for sampling based fitted value iteration},
  booktitle    = {Machine Learning, Proceedings of the Twenty-Second International Conference
                  {(ICML} 2005), Bonn, Germany, August 7-11, 2005},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {119},
  pages        = {880--887},
  publisher    = {{ACM}},
  year         = {2005},
  url          = {https://doi.org/10.1145/1102351.1102462},
  doi          = {10.1145/1102351.1102462},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/SzepesvariM05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/Szepesvari04,
  author       = {Csaba Szepesv{\'{a}}ri},
  editor       = {Deborah L. McGuinness and
                  George Ferguson},
  title        = {Shortest Path Discovery Problems: {A} Framework, Algorithms and Experimental
                  Results},
  booktitle    = {Proceedings of the Nineteenth National Conference on Artificial Intelligence,
                  Sixteenth Conference on Innovative Applications of Artificial Intelligence,
                  July 25-29, 2004, San Jose, California, {USA}},
  pages        = {550--555},
  publisher    = {{AAAI} Press / The {MIT} Press},
  year         = {2004},
  url          = {http://www.aaai.org/Library/AAAI/2004/aaai04-088.php},
  timestamp    = {Tue, 05 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/Szepesvari04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ecai/SzepesvariKK04,
  author       = {Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s Kocsor and
                  Korn{\'{e}}l Kov{\'{a}}cs},
  editor       = {Ram{\'{o}}n L{\'{o}}pez de M{\'{a}}ntaras and
                  Lorenza Saitta},
  title        = {Kernel Machine Based Feature Extraction Algorithms for Regression
                  Problems},
  booktitle    = {Proceedings of the 16th Eureopean Conference on Artificial Intelligence,
                  ECAI'2004, including Prestigious Applicants of Intelligent Systems,
                  {PAIS} 2004, Valencia, Spain, August 22-27, 2004},
  pages        = {1091--1092},
  publisher    = {{IOS} Press},
  year         = {2004},
  timestamp    = {Fri, 11 May 2018 12:42:30 +0200},
  biburl       = {https://dblp.org/rec/conf/ecai/SzepesvariKK04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/eccv/TormaS04,
  author       = {P{\'{e}}ter Torma and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Tom{\'{a}}s Pajdla and
                  Jiri Matas},
  title        = {Enhancing Particle Filters Using Local Likelihood Sampling},
  booktitle    = {Computer Vision - {ECCV} 2004, 8th European Conference on Computer
                  Vision, Prague, Czech Republic, May 11-14, 2004. Proceedings, Part
                  {I}},
  series       = {Lecture Notes in Computer Science},
  volume       = {3021},
  pages        = {16--27},
  publisher    = {Springer},
  year         = {2004},
  url          = {https://doi.org/10.1007/978-3-540-24670-1\_2},
  doi          = {10.1007/978-3-540-24670-1\_2},
  timestamp    = {Tue, 14 May 2019 10:00:45 +0200},
  biburl       = {https://dblp.org/rec/conf/eccv/TormaS04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ecml/KocsorKS04,
  author       = {Andr{\'{a}}s Kocsor and
                  Korn{\'{e}}l Kov{\'{a}}cs and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Jean{-}Fran{\c{c}}ois Boulicaut and
                  Floriana Esposito and
                  Fosca Giannotti and
                  Dino Pedreschi},
  title        = {Margin Maximizing Discriminant Analysis},
  booktitle    = {Machine Learning: {ECML} 2004, 15th European Conference on Machine
                  Learning, Pisa, Italy, September 20-24, 2004, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {3201},
  pages        = {227--238},
  publisher    = {Springer},
  year         = {2004},
  url          = {https://doi.org/10.1007/978-3-540-30115-8\_23},
  doi          = {10.1007/978-3-540-30115-8\_23},
  timestamp    = {Tue, 14 May 2019 10:00:54 +0200},
  biburl       = {https://dblp.org/rec/conf/ecml/KocsorKS04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SzepesvariS04,
  author       = {Csaba Szepesv{\'{a}}ri and
                  William D. Smart},
  editor       = {Carla E. Brodley},
  title        = {Interpolation-based Q-learning},
  booktitle    = {Machine Learning, Proceedings of the Twenty-first International Conference
                  {(ICML} 2004), Banff, Alberta, Canada, July 4-8, 2004},
  series       = {{ACM} International Conference Proceeding Series},
  volume       = {69},
  publisher    = {{ACM}},
  year         = {2004},
  url          = {https://doi.org/10.1145/1015330.1015445},
  doi          = {10.1145/1015330.1015445},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/SzepesvariS04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/TormaS03,
  author       = {P{\'{e}}ter Torma and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Christopher M. Bishop and
                  Brendan J. Frey},
  title        = {Sequential Importance Sampling for Visual Tracking Reconsidered},
  booktitle    = {Proceedings of the Ninth International Workshop on Artificial Intelligence
                  and Statistics, {AISTATS} 2003, Key West, Florida, USA, January 3-6,
                  2003},
  publisher    = {Society for Artificial Intelligence and Statistics},
  year         = {2003},
  url          = {http://research.microsoft.com/en-us/um/cambridge/events/aistats2003/proceedings/198.pdf},
  timestamp    = {Wed, 06 May 2015 20:33:21 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/TormaS03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/automatica/FrenchSR02,
  author       = {Mark French and
                  Csaba Szepesv{\'{a}}ri and
                  Eric Rogers},
  title        = {{LQ} performance bounds for adaptive output feedback controllers for
                  functionally uncertain nonlinear systems},
  journal      = {Autom.},
  volume       = {38},
  number       = {4},
  pages        = {683--693},
  year         = {2002},
  url          = {https://doi.org/10.1016/S0005-1098(01)00218-7},
  doi          = {10.1016/S0005-1098(01)00218-7},
  timestamp    = {Thu, 20 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/automatica/FrenchSR02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/mcss/FrenchSR02,
  author       = {Mark French and
                  Csaba Szepesv{\'{a}}ri and
                  Eric Rogers},
  title        = {An Asymptotic Scaling Analysis of {LQ} Performance for an Approximate
                  Adaptive Control Design},
  journal      = {Math. Control. Signals Syst.},
  volume       = {15},
  number       = {2},
  pages        = {145--176},
  year         = {2002},
  url          = {https://doi.org/10.1007/s004980200006},
  doi          = {10.1007/S004980200006},
  timestamp    = {Thu, 10 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/mcss/FrenchSR02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/aicom/Szepesvari01,
  author       = {Csaba Szepesv{\'{a}}ri},
  title        = {Efficient approximate planning in continuous space Markovian Decision
                  Problems},
  journal      = {{AI} Commun.},
  volume       = {14},
  number       = {3},
  pages        = {163--176},
  year         = {2001},
  url          = {http://content.iospress.com/articles/ai-communications/aic244},
  timestamp    = {Fri, 15 May 2015 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/aicom/Szepesvari01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijns/LorinczHS01,
  author       = {Andr{\'{a}}s L{\"{o}}rincz and
                  Gy{\"{o}}rgy H{\'{e}}v{\'{\i}}zi and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Ockham's Razor Modeling of the Matrisome Channels of the Basal Ganglia
                  Thalamocortical Loops},
  journal      = {Int. J. Neural Syst.},
  volume       = {11},
  number       = {2},
  pages        = {125--143},
  year         = {2001},
  url          = {https://doi.org/10.1142/S0129065701000412},
  doi          = {10.1142/S0129065701000412},
  timestamp    = {Wed, 14 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijns/LorinczHS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/actaC/KalmarSL00,
  author       = {Zsolt Kalm{\'{a}}r and
                  Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s L{\"{o}}rincz},
  title        = {Modular Reinforcement Learning: {A} Case Study in a Robot Domain},
  journal      = {Acta Cybern.},
  volume       = {14},
  number       = {3},
  pages        = {507--522},
  year         = {2000},
  url          = {https://cyber.bibl.u-szeged.hu/index.php/actcybern/article/view/3546},
  timestamp    = {Wed, 16 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/actaC/KalmarSL00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/SinghJLS00,
  author       = {Satinder Singh and
                  Tommi S. Jaakkola and
                  Michael L. Littman and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Convergence Results for Single-Step On-Policy Reinforcement-Learning
                  Algorithms},
  journal      = {Mach. Learn.},
  volume       = {38},
  number       = {3},
  pages        = {287--308},
  year         = {2000},
  url          = {https://doi.org/10.1023/A:1007678930559},
  doi          = {10.1023/A:1007678930559},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ml/SinghJLS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tac/FrenchSR00,
  author       = {Mark French and
                  Csaba Szepesv{\'{a}}ri and
                  Eric Rogers},
  title        = {Uncertainty, performance, and model dependency in approximate adaptive
                  nonlinear control},
  journal      = {{IEEE} Trans. Autom. Control.},
  volume       = {45},
  number       = {2},
  pages        = {353--358},
  year         = {2000},
  url          = {https://doi.org/10.1109/9.839965},
  doi          = {10.1109/9.839965},
  timestamp    = {Wed, 20 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tac/FrenchSR00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/tsd/BaloghDGSS00,
  author       = {Gy{\"{o}}rgy Balogh and
                  Ervin Dobler and
                  Tam{\'{a}}s Gr{\"{o}}bler and
                  B{\'{e}}la Smodics and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Petr Sojka and
                  Ivan Kopecek and
                  Karel Pala},
  title        = {FlexVoice: {A} Parametric Approach to High-Quality Speech Synthesis},
  booktitle    = {Text, Speech and Dialogue - Third International Workshop, {TSD} 2000,
                  Brno, Czech Republic, September 13-16, 2000, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1902},
  pages        = {189--194},
  publisher    = {Springer},
  year         = {2000},
  url          = {https://doi.org/10.1007/3-540-45323-7\_32},
  doi          = {10.1007/3-540-45323-7\_32},
  timestamp    = {Tue, 14 May 2019 10:00:45 +0200},
  biburl       = {https://dblp.org/rec/conf/tsd/BaloghDGSS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nar/MurvaiVBSAP99,
  author       = {J{\'{a}}nos Murvai and
                  Kristian Vlahovicek and
                  Endre Barta and
                  Csaba Szepesv{\'{a}}ri and
                  Cristina Acatrinei and
                  S{\'{a}}ndor Pongor},
  title        = {The {SBASE} protein domain library, release 6.0: a collection of annotated
                  protein sequence segments},
  journal      = {Nucleic Acids Res.},
  volume       = {27},
  number       = {1},
  pages        = {257--259},
  year         = {1999},
  url          = {https://doi.org/10.1093/nar/27.1.257},
  doi          = {10.1093/NAR/27.1.257},
  timestamp    = {Fri, 09 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nar/MurvaiVBSAP99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/neco/SzepesvariL99,
  author       = {Csaba Szepesv{\'{a}}ri and
                  Michael L. Littman},
  title        = {A Unified Analysis of Value-Function-Based Reinforcement Learning
                  Algorithms},
  journal      = {Neural Comput.},
  volume       = {11},
  number       = {8},
  pages        = {2017--2060},
  year         = {1999},
  url          = {https://doi.org/10.1162/089976699300016070},
  doi          = {10.1162/089976699300016070},
  timestamp    = {Tue, 01 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/neco/SzepesvariL99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nn/KalmarMSL99,
  author       = {Zsolt Kalm{\'{a}}r and
                  Zsolt Marczell and
                  Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s L{\"{o}}rincz},
  title        = {Parallel and robust skeletonization built on self-organizing elements},
  journal      = {Neural Networks},
  volume       = {12},
  number       = {1},
  pages        = {163--173},
  year         = {1999},
  url          = {https://doi.org/10.1016/S0893-6080(98)00119-1},
  doi          = {10.1016/S0893-6080(98)00119-1},
  timestamp    = {Wed, 14 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/nn/KalmarMSL99.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/actaC/Szepesvari98,
  author       = {Csaba Szepesv{\'{a}}ri},
  title        = {Non-Markovian Policies in Sequential Decision Problems},
  journal      = {Acta Cybern.},
  volume       = {13},
  number       = {3},
  pages        = {305--318},
  year         = {1998},
  url          = {https://cyber.bibl.u-szeged.hu/index.php/actcybern/article/view/3493},
  timestamp    = {Wed, 16 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/actaC/Szepesvari98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/arobots/KalmarSL98,
  author       = {Zsolt Kalm{\'{a}}r and
                  Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s L{\"{o}}rincz},
  title        = {Module-Based Reinforcement Learning: Experiments with a Real Robot},
  journal      = {Auton. Robots},
  volume       = {5},
  number       = {3-4},
  pages        = {273--295},
  year         = {1998},
  url          = {https://doi.org/10.1023/A:1008858222277},
  doi          = {10.1023/A:1008858222277},
  timestamp    = {Thu, 18 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/arobots/KalmarSL98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jfr/SzepesvariL98,
  author       = {Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s L{\"{o}}rincz},
  title        = {An integrated architecture for motion-control and path-planning},
  journal      = {J. Field Robotics},
  volume       = {15},
  number       = {1},
  pages        = {1--15},
  year         = {1998},
  url          = {https://doi.org/10.1002/(SICI)1097-4563(199812)15:1\&\#60;1::AID-ROB1\&\#62;3.0.CO;2-V},
  doi          = {10.1002/(SICI)1097-4563(199812)15:1\&\#60;1::AID-ROB1\&\#62;3.0.CO;2-V},
  timestamp    = {Sun, 28 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jfr/SzepesvariL98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/KalmarSL98,
  author       = {Zsolt Kalm{\'{a}}r and
                  Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s L{\"{o}}rincz},
  title        = {Module-Based Reinforcement Learning: Experiments with a Real Robot},
  journal      = {Mach. Learn.},
  volume       = {31},
  number       = {1-3},
  pages        = {55--85},
  year         = {1998},
  url          = {https://doi.org/10.1023/A:1007440607681},
  doi          = {10.1023/A:1007440607681},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/KalmarSL98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/GaborKS98,
  author       = {Zolt{\'{a}}n G{\'{a}}bor and
                  Zsolt Kalm{\'{a}}r and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Jude W. Shavlik},
  title        = {Multi-criteria Reinforcement Learning},
  booktitle    = {Proceedings of the Fifteenth International Conference on Machine Learning
                  {(ICML} 1998), Madison, Wisconsin, USA, July 24-27, 1998},
  pages        = {197--205},
  publisher    = {Morgan Kaufmann},
  year         = {1998},
  timestamp    = {Thu, 30 Jun 2011 10:34:12 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/GaborKS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/mammo/SorantinSMWS98,
  author       = {Erich Sorantin and
                  Ferdinand Schmidt and
                  Heinz Mayer and
                  Peter Winkler and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Nico Karssemeijer and
                  Martin Thijssen and
                  Jan H. C. L. Hendriks and
                  Leon van Erning},
  title        = {Automated Detection and Classification of Micro-Calcifications in
                  Mammograms Using Artifical Neural Nets},
  booktitle    = {Digital Mammography, Fourth International Workshop on Digital Mammograph,
                  {IWDM} 1998, Nijmegen, The Netherlands, June 1998},
  series       = {Computational Imaging and Vision},
  volume       = {13},
  pages        = {225--232},
  publisher    = {Springer},
  year         = {1998},
  url          = {https://doi.org/10.1007/978-94-011-5318-8\_37},
  doi          = {10.1007/978-94-011-5318-8\_37},
  timestamp    = {Fri, 19 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/mammo/SorantinSMWS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/mammo/WinklerSTSMS98,
  author       = {Peter Winkler and
                  Erich Sorantin and
                  Attila Tan{\'{a}}cs and
                  Ferdinand Schmidt and
                  Heinz Mayer and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Nico Karssemeijer and
                  Martin Thijssen and
                  Jan H. C. L. Hendriks and
                  Leon van Erning},
  title        = {Performance-Evaluation for Automated Detection of Microcalcifications
                  in Mammograms Using Three Different Film-Digitizers},
  booktitle    = {Digital Mammography, Fourth International Workshop on Digital Mammograph,
                  {IWDM} 1998, Nijmegen, The Netherlands, June 1998},
  series       = {Computational Imaging and Vision},
  volume       = {13},
  pages        = {485--486},
  publisher    = {Springer},
  year         = {1998},
  url          = {https://doi.org/10.1007/978-94-011-5318-8\_83},
  doi          = {10.1007/978-94-011-5318-8\_83},
  timestamp    = {Fri, 19 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/mammo/WinklerSTSMS98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nn/SzepesvariCL97,
  author       = {Csaba Szepesv{\'{a}}ri and
                  Szabolcs Cimmer and
                  Andr{\'{a}}s L{\"{o}}rincz},
  title        = {Neurocontroller using dynamic state feedback for compensatory control},
  journal      = {Neural Networks},
  volume       = {10},
  number       = {9},
  pages        = {1691--1708},
  year         = {1997},
  url          = {https://doi.org/10.1016/S0893-6080(97)00043-9},
  doi          = {10.1016/S0893-6080(97)00043-9},
  timestamp    = {Thu, 08 Jun 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nn/SzepesvariCL97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ecml/Szepesvari97,
  author       = {Csaba Szepesv{\'{a}}ri},
  editor       = {Maarten van Someren and
                  Gerhard Widmer},
  title        = {Learning and Exploitation Do Not Conflict Under Minimax Optimality},
  booktitle    = {Machine Learning: ECML-97, 9th European Conference on Machine Learning,
                  Prague, Czech Republic, April 23-25, 1997, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1224},
  pages        = {242--249},
  publisher    = {Springer},
  year         = {1997},
  url          = {https://doi.org/10.1007/3-540-62858-4\_89},
  doi          = {10.1007/3-540-62858-4\_89},
  timestamp    = {Tue, 14 May 2019 10:00:54 +0200},
  biburl       = {https://dblp.org/rec/conf/ecml/Szepesvari97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ewlr/KalmarSL97,
  author       = {Zsolt Kalm{\'{a}}r and
                  Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s L{\"{o}}rincz},
  editor       = {Andreas Birk and
                  John Demiris},
  title        = {Module Based Reinforcement Learning: An Application to a Real Robot},
  booktitle    = {Learning Robots, 6th European Workshop, EWLR-6, Brighton, England,
                  UK, August 1-2, 1997, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1545},
  pages        = {29--45},
  publisher    = {Springer},
  year         = {1997},
  url          = {https://doi.org/10.1007/3-540-49240-2\_3},
  doi          = {10.1007/3-540-49240-2\_3},
  timestamp    = {Tue, 14 May 2019 10:00:49 +0200},
  biburl       = {https://dblp.org/rec/conf/ewlr/KalmarSL97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Szepesvari97,
  author       = {Csaba Szepesv{\'{a}}ri},
  editor       = {Michael I. Jordan and
                  Michael J. Kearns and
                  Sara A. Solla},
  title        = {The Asymptotic Convergence-Rate of Q-learning},
  booktitle    = {Advances in Neural Information Processing Systems 10, {[NIPS} Conference,
                  Denver, Colorado, USA, 1997]},
  pages        = {1064--1070},
  publisher    = {The {MIT} Press},
  year         = {1997},
  url          = {http://papers.nips.cc/paper/1383-the-asymptotic-convergence-rate-of-q-learning},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/Szepesvari97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijns/FominRSL96,
  author       = {Tibor Fomin and
                  Tam{\'{a}}s Rozgonyi and
                  Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s L{\"{o}}rincz},
  title        = {Self-Organizing Multi-Resolution Grid for Motion Planning and Control},
  journal      = {Int. J. Neural Syst.},
  volume       = {7},
  number       = {6},
  pages        = {757},
  year         = {1996},
  url          = {http://ejournals.wspc.com.sg/ijns/07/0706/fomin.html},
  timestamp    = {Fri, 16 Jul 2004 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijns/FominRSL96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijon/SzepesvariL96,
  author       = {Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s L{\"{o}}rincz},
  title        = {Approximate geometry representations and sensory fusion},
  journal      = {Neurocomputing},
  volume       = {12},
  number       = {2-3},
  pages        = {267--287},
  year         = {1996},
  url          = {https://doi.org/10.1016/0925-2312(95)00116-6},
  doi          = {10.1016/0925-2312(95)00116-6},
  timestamp    = {Tue, 06 Jun 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ijon/SzepesvariL96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icann/SzepesvariL96,
  author       = {Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s L{\"{o}}rincz},
  editor       = {Christoph von der Malsburg and
                  Werner von Seelen and
                  Jan C. Vorbr{\"{u}}ggen and
                  Bernhard Sendhoff},
  title        = {Inverse Dynamics Controllers for Robust Control: Consequences for
                  Neurocontrollers},
  booktitle    = {Artificial Neural Networks - {ICANN} 96, 1996 International Conference,
                  Bochum, Germany, July 16-19, 1996, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {1112},
  pages        = {791--796},
  publisher    = {Springer},
  year         = {1996},
  url          = {https://doi.org/10.1007/3-540-61510-5\_133},
  doi          = {10.1007/3-540-61510-5\_133},
  timestamp    = {Tue, 14 May 2019 10:00:49 +0200},
  biburl       = {https://dblp.org/rec/conf/icann/SzepesvariL96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/LittmanS96,
  author       = {Michael L. Littman and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Lorenza Saitta},
  title        = {A Generalized Reinforcement-Learning Model: Convergence and Applications},
  booktitle    = {Machine Learning, Proceedings of the Thirteenth International Conference
                  {(ICML} '96), Bari, Italy, July 3-6, 1996},
  pages        = {310--318},
  publisher    = {Morgan Kaufmann},
  year         = {1996},
  timestamp    = {Fri, 23 Dec 2011 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/LittmanS96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/neco/SzepesvariBL94,
  author       = {Csaba Szepesv{\'{a}}ri and
                  L{\'{a}}szl{\'{o}} Bal{\'{a}}zs and
                  Andr{\'{a}}s L{\"{o}}rincz},
  title        = {Topology Learning Solved by Extended Objects: {A} Neural Network Model},
  journal      = {Neural Comput.},
  volume       = {6},
  number       = {3},
  pages        = {441--458},
  year         = {1994},
  url          = {https://doi.org/10.1162/neco.1994.6.3.441},
  doi          = {10.1162/NECO.1994.6.3.441},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/neco/SzepesvariBL94.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/adb/SzepesvariL93,
  author       = {Csaba Szepesv{\'{a}}ri and
                  Andr{\'{a}}s L{\"{o}}rincz},
  title        = {Behavior of an Adaptive Self-organizing Autonomous Agent Working with
                  Cues and Competing Concepts},
  journal      = {Adapt. Behav.},
  volume       = {2},
  number       = {2},
  pages        = {131--160},
  year         = {1993},
  url          = {https://doi.org/10.1177/105971239300200202},
  doi          = {10.1177/105971239300200202},
  timestamp    = {Tue, 25 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/adb/SzepesvariL93.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics