Voir la notice de l'article provenant de la source Math-Net.Ru
@article{MGTA_2022_14_2_a0, author = {Sergey V. Garbar and Alexander V. Kolnogorov}, title = {Customization of {J.~Bather} {UCB} strategy for a {Gaussian} multi-armed bandit}, journal = {Matemati\v{c}eska\^a teori\^a igr i e\"e prilo\v{z}eni\^a}, pages = {3--30}, publisher = {mathdoc}, volume = {14}, number = {2}, year = {2022}, language = {ru}, url = {http://geodesic.mathdoc.fr/item/MGTA_2022_14_2_a0/} }
TY - JOUR AU - Sergey V. Garbar AU - Alexander V. Kolnogorov TI - Customization of J.~Bather UCB strategy for a Gaussian multi-armed bandit JO - Matematičeskaâ teoriâ igr i eë priloženiâ PY - 2022 SP - 3 EP - 30 VL - 14 IS - 2 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/MGTA_2022_14_2_a0/ LA - ru ID - MGTA_2022_14_2_a0 ER -
%0 Journal Article %A Sergey V. Garbar %A Alexander V. Kolnogorov %T Customization of J.~Bather UCB strategy for a Gaussian multi-armed bandit %J Matematičeskaâ teoriâ igr i eë priloženiâ %D 2022 %P 3-30 %V 14 %N 2 %I mathdoc %U http://geodesic.mathdoc.fr/item/MGTA_2022_14_2_a0/ %G ru %F MGTA_2022_14_2_a0
Sergey V. Garbar; Alexander V. Kolnogorov. Customization of J.~Bather UCB strategy for a Gaussian multi-armed bandit. Matematičeskaâ teoriâ igr i eë priloženiâ, Tome 14 (2022) no. 2, pp. 3-30. http://geodesic.mathdoc.fr/item/MGTA_2022_14_2_a0/
[1] Borovkov A. A., Matematicheskaya statistika. Dopolnitelnye glavy, Uchebnoe posobie dlya vuzov, Nauka. Glavnaya redaktsiya fiziko-matematicheskoi literatury, M., 1984
[2] Varshavskii V. I., Kollektivnoe povedenie avtomatov, Nauka, M., 1973
[3] Kolnogorov A. V., “Gaussovskii dvurukii bandit i optimizatsiya gruppovoi obrabotki dannykh”, Probl. peredachi inform., 54:1 (2018), 93–111 | MR | Zbl
[4] Kolnogorov A. V., Garbar S. V., “Mashinnoe obuchenie, zadacha o mnogorukom bandite i paketnoe pravilo UCB”, XIII Vserossiiskoe soveschanie po problemam upravleniya, VSPU-2019 (17–20 iyunya 2019 g., Moskva), ed. D.A. Novikov, IPU RAN, M., 2019, 2120–2124
[5] Kolnogorov A. V., “Gaussovskii dvurukii bandit: predelnoe opisanie”, Probl. peredachi inform., 56:3 (2020), 86–111 | MR | Zbl
[6] Kolnogorov A.V, Nazin A. V., Shiyan D. N., “Zadacha o dvurukom bandite i paketnaya versiya algoritma zerkalnogo spuska”, MTIP, 13:2 (2021), 9–39 | MR | Zbl
[7] Nazin A. V., Poznyak A. S., Adaptivnyi vybor variantov, Nauka, M., 1986
[8] Presman E. L., Sonin I. M., Posledovatelnoe upravlenie po nepolnym dannym, Nauka, M., 1982
[9] Samarskii A. A., Teoriya raznostnykh skhem, Nauka, M., 1989
[10] Satton R. S., Barto E. Dzh., Obuchenie s podkrepleniem: Vvedenie, 2-e izd., DMK Press, M., 2020
[11] Smirnov D. S., Gromova E. V., “Model prinyatiya reshenii pri nalichii ekspertov kak modifitsirovannaya zadacha o mnogorukom bandite”, MTIP, 9:4 (2017), 69–87 | Zbl
[12] Sragovich V. G., Adaptivnoe upravlenie, Nauka, M., 1981
[13] Tsetlin M. L., Issledovaniya po teorii avtomatov i modelirovaniyu biologicheskikh sistem, Nauka, M., 1969
[14] Auer P., “Using Confidence Bounds for Exploitation-Exploration Trade-offs”, Journal of Machine Learning Research, 3 (2002), 397–422 | MR
[15] Auer P., Cesa-Bianchi N., Fischer P., “Finite-time analysis of the multi-armed bandit problem”, Machine learning, 47:2–3 (2002), 235–256 | DOI | Zbl
[16] Bather J. A., “The Minimax Risk for the Two-Armed Bandit Problem”, Mathematical Learning Models – Theory and Algorithms, Lecture Notes in Statistics, 20, Springer-Verlag, New York, 1983, 1–11 | DOI | MR
[17] Berry D. A., Fristedt B., Bandit Problems: Sequential Allocation of Experiments, Chapman and Hall, London–New York, 1985 | MR | Zbl
[18] Garbar S. V., Kolnogorov A. V., “Invariant description for regret of UCB strategy for Gaussian multi-arm bandit”, Proceedings of the 6th Stochastic Modeling Techniques and Data Analysis International Conference with Demographics Workshop, SMTDA 2020 (2–5 June 2020), 251–260
[19] Gittins J. C., Multi-Armed Bandit Allocation Indices, Wiley-Interscience Series in Systems and Optimization, John Wiley Sons, Ltd, Chichester, 1989 | MR | Zbl
[20] Kaufmann E., “On Bayesian Index Policies for Sequential Resource Allocation”, Annals of Statistics, 46:2 (2018), 842–865 | DOI | MR | Zbl
[21] Lai T. L., “Adaptive Treatment Allocation and the Multi-Armed Bandit Problem”, Annals of Statist., 25 (1987), 1091–1114 | MR
[22] Lai T. L., Levin B., Robbins H., Siegmund D., “Sequential medical trials (stopping rules/asymptotic optimality)”, Proc. Nati. Acad. Sci. USA, 77:6 (1980), 3135–3138 | DOI | MR | Zbl
[23] Lugosi G., Cesa-Bianchi N., Prediction, Learning and Games, Cambridge University Press, Cambridge, 2006 | MR | Zbl
[24] Perchet V., Rigollet P., Chassang S., Snowberg E., “Batched Bandit Problems”, Annals of Statistics, 44:2 (2016), 660–681 | DOI | MR | Zbl
[25] Lattimore T., Szepesvari C., Bandit Algorithms, Cambridge University Press, Cambridge, 2020 | Zbl
[26] Vogel W., “An Asymptotic Minimax Theorem for the Two-Armed Bandit Problem”, Ann. Math. Stat., 31 (1960), 444–451 | DOI | MR | Zbl