Voir la notice de l'article provenant de la source Math-Net.Ru
@article{MGTA_2021_13_2_a1, author = {Alexander V. Kolnogorov and Alexander V. Nazin and Dmitry N. Shiyan}, title = {Two-armed bandit problem and batch version of the mirror descent algorithm}, journal = {Matemati\v{c}eska\^a teori\^a igr i e\"e prilo\v{z}eni\^a}, pages = {9--39}, publisher = {mathdoc}, volume = {13}, number = {2}, year = {2021}, language = {ru}, url = {http://geodesic.mathdoc.fr/item/MGTA_2021_13_2_a1/} }
TY - JOUR AU - Alexander V. Kolnogorov AU - Alexander V. Nazin AU - Dmitry N. Shiyan TI - Two-armed bandit problem and batch version of the mirror descent algorithm JO - Matematičeskaâ teoriâ igr i eë priloženiâ PY - 2021 SP - 9 EP - 39 VL - 13 IS - 2 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/MGTA_2021_13_2_a1/ LA - ru ID - MGTA_2021_13_2_a1 ER -
%0 Journal Article %A Alexander V. Kolnogorov %A Alexander V. Nazin %A Dmitry N. Shiyan %T Two-armed bandit problem and batch version of the mirror descent algorithm %J Matematičeskaâ teoriâ igr i eë priloženiâ %D 2021 %P 9-39 %V 13 %N 2 %I mathdoc %U http://geodesic.mathdoc.fr/item/MGTA_2021_13_2_a1/ %G ru %F MGTA_2021_13_2_a1
Alexander V. Kolnogorov; Alexander V. Nazin; Dmitry N. Shiyan. Two-armed bandit problem and batch version of the mirror descent algorithm. Matematičeskaâ teoriâ igr i eë priloženiâ, Tome 13 (2021) no. 2, pp. 9-39. http://geodesic.mathdoc.fr/item/MGTA_2021_13_2_a1/
[1] Borovkov A. A., Matematicheskaya statistika. Dopolnitelnye glavy, Uchebnoe posobie dlya vuzov, Nauka. Glavnaya redaktsiya fiziko-matematicheskoi literatury, M., 1984
[2] Varshavskii V. I., Kollektivnoe povedenie avtomatov, Nauka, M., 1973
[3] Gasnikov A. V., Nesterov Yu. E., Spokoinyi V. G., “Ob effektivnosti odnogo metoda randomizatsii zerkalnogo spuska v zadachakh onlain optimizatsii”, Zhurnal vychislitelnoi matematiki i matematicheskoi fiziki, 55:4 (2015), 582–598 | Zbl
[4] Kolnogorov A. V., “Gaussovskii dvurukii bandit i optimizatsiya gruppovoi obrabotki dannykh”, Probl. peredachi inform., 54:1 (2018), 93–111 | MR | Zbl
[5] Kolnogorov A. V., “Gaussovskii dvurukii bandit: predelnoe opisanie”, Probl. peredachi inform., 56:3 (2020), 86–111 | MR | Zbl
[6] Nazin A. V., Poznyak A. S., Adaptivnyi vybor variantov, Nauka, M., 1986
[7] Nemirovskii A. S., Yudin D. B., “Effektivnye metody resheniya zadach vypuklogo programmirovaniya bolshoi razmernosti”, Ekonomika i matematicheskie metody, 15:1 (1979), 135–152 | MR | Zbl
[8] Presman E. L., Sonin I. M., Posledovatelnoe upravlenie po nepolnym dannym, Nauka, M., 1982
[9] Smirnov D. S., Gromova E. V., “Model prinyatiya reshenii pri nalichii ekspertov kak modifitsirovannaya zadacha o mnogorukom bandite”, MTIP, 9:4 (2017), 69–87 | Zbl
[10] Sragovich V. G., Adaptivnoe upravlenie, Nauka, M., 1981
[11] Tsetlin M. L., Issledovaniya po teorii avtomatov i modelirovaniyu biologicheskikh sistem, Nauka, M., 1969
[12] Auer P., “Using Confidence Bounds for Exploitation-Exploration Trade-offs”, Journal of Machine Learning Research, 3 (2002), 397–422 | MR
[13] Auer P., Cesa-Bianchi N., Fischer P., “Finite-time analysis of the multi-armed bandit problem”, Machine learning, 47:2–3 (2002), 235–256 | DOI | Zbl
[14] Bather J. A., “The Minimax Risk for the Two-Armed Bandit Problem”, Mathematical Learning Models – Theory and Algorithms, Lecture Notes in Statistics, 20, Springer-Verlag, New York, 1983, 1–11 | DOI | MR
[15] Berry D. A., Fristedt B., Bandit Problems: Sequential Allocation of Experiments, Chapman and Hall, London–New York, 1985 | MR | Zbl
[16] Fabius J., van Zwet W. R., “Some Remarks on the Two-Armed Bandit”, Ann. Math. Stat., 41 (1970), 1906–1916 | DOI | MR | Zbl
[17] Juditsky A., Nazin A. V., Tsybakov A. B., Vayatis N., “Gap-Free Bounds for Stochastic Multi-Armed Bandit”, Proc. 17th World Congress IFAC (Seoul, Korea, July 6–11), 2008, 11560–11563
[18] Kaufmann E., “On Bayesian Index Policies for Sequential Resource Allocation”, Annals of Statistics, 46:2 (2018), 842–865 | DOI | MR | Zbl
[19] Lai T. L., Levin B., Robbins H., Siegmund D., “Sequential medical trials (stopping rules/asymptotic optimality)”, Proc. Nati. Acad. Sci. USA, 77:6 (1980), 3135–3138 | DOI | MR | Zbl
[20] Lattimore T., Szepesvari C., Bandit Algorithms, Cambridge University Press, Cambridge, 2020 | Zbl
[21] Lai T. L., Robbins H., “Asymptotically Efficient Adaptive Allocation Rules”, Advances in Applied Mathematics, 6 (1985), 4–22 | DOI | MR | Zbl
[22] Lugosi G., Cesa-Bianchi N., Prediction, Learning and Games, Cambridge University Press, Cambridge, 2006 | MR | Zbl
[23] Robbins H., “Some Aspects of the Sequential Design of Experiments”, Bulletin AMS, 58:5 (1952), 527–535 | DOI | MR | Zbl
[24] Vogel W., “An Asymptotic Minimax Theorem for the Two-Armed Bandit Problem”, Ann. Math. Stat., 31 (1960), 444–451 | DOI | MR | Zbl