Voir la notice de l'article provenant de la source Math-Net.Ru
@article{MGTA_2023_15_4_a0, author = {Sergey V. Garbar and Alexander V. Kolnogorov and Alexey N. Lazutchenko}, title = {UCB strategies and optimization of batch processing in a one-armed bandit problem}, journal = {Matemati\v{c}eska\^a teori\^a igr i e\"e prilo\v{z}eni\^a}, pages = {3--27}, publisher = {mathdoc}, volume = {15}, number = {4}, year = {2023}, language = {ru}, url = {http://geodesic.mathdoc.fr/item/MGTA_2023_15_4_a0/} }
TY - JOUR AU - Sergey V. Garbar AU - Alexander V. Kolnogorov AU - Alexey N. Lazutchenko TI - UCB strategies and optimization of batch processing in a one-armed bandit problem JO - Matematičeskaâ teoriâ igr i eë priloženiâ PY - 2023 SP - 3 EP - 27 VL - 15 IS - 4 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/MGTA_2023_15_4_a0/ LA - ru ID - MGTA_2023_15_4_a0 ER -
%0 Journal Article %A Sergey V. Garbar %A Alexander V. Kolnogorov %A Alexey N. Lazutchenko %T UCB strategies and optimization of batch processing in a one-armed bandit problem %J Matematičeskaâ teoriâ igr i eë priloženiâ %D 2023 %P 3-27 %V 15 %N 4 %I mathdoc %U http://geodesic.mathdoc.fr/item/MGTA_2023_15_4_a0/ %G ru %F MGTA_2023_15_4_a0
Sergey V. Garbar; Alexander V. Kolnogorov; Alexey N. Lazutchenko. UCB strategies and optimization of batch processing in a one-armed bandit problem. Matematičeskaâ teoriâ igr i eë priloženiâ, Tome 15 (2023) no. 4, pp. 3-27. http://geodesic.mathdoc.fr/item/MGTA_2023_15_4_a0/
[1] Borovkov A.A., Matematicheskaya statistika, Dopolnitelnye glavy: Uchebnoe posobie dlya vuzov, Nauka. Glavnaya redaktsiya fiziko-matematicheskoi literatury, M., 1984
[2] Varshavskii V.I., Kollektivnoe povedenie avtomatov, Nauka, M., 1973
[3] Garbar S.V., Kolnogorov A.V., “Adaptatsiya strategii UCB Dzh. Basera dlya gaussovskogo mnogorukogo bandita”, MTIiP, 14:2 (2022), 3–30 | MR
[4] Kolnogorov A.V., “Robastnoe parallelnoe upravlenie v sluchainoi srede i optimizatsiya obrabotki dannykh”, Avtomat. i telemekh., 2014, no. 12, 42–55 | MR | Zbl
[5] Kolnogorov A.V., “Zadacha ob odnorukom bandite dlya sistem s parallelnoi obrabotkoi dannykh”, Probl. peredachi inform., 51:2 (2015), 99–113 | MR | Zbl
[6] Nazin A.V., Poznyak A.S., Adaptivnyi vybor variantov, Nauka, M., 1986
[7] Presman E.L., Sonin I.M., Posledovatelnoe upravlenie po nepolnym dannym, Nauka, M., 1982
[8] Satton R. S., Barto E. Dzh., Obuchenie s podkrepleniem: vvedenie, 2-e izd., DMK Press, M., 2020
[9] Smirnov D.S., Gromova E.V., “Model prinyatiya reshenii pri nalichii ekspertov kak modifitsirovannaya zadacha o mnogorukom bandite”, MTIP, 9:4 (2017), 69–87 | Zbl
[10] Sragovich V.G., Adaptivnoe upravlenie, Nauka, M., 1981
[11] Tsetlin M.L., Issledovaniya po teorii avtomatov i modelirovaniyu biologicheskikh sistem, Nauka, M., 1969
[12] Auer P., “Using confidence bounds for exploitation-exploration trade-offs”, J. of Machine Learning Research, 3 (2002), 397–422 | MR
[13] Auer P., Cesa-Bianchi N., Fisher P., “Finite-time analysis of the multi-armed bandit problem”, Machine Learning, 47:2–3 (2002), 235–256 | DOI | Zbl
[14] Bather J.A., “The minimax risk for the two-armed bandit problem”, Mathematical Learning Models – Theory and Algorithms, Lecture Notes in Statistics, 20, Springer-Verlag, NY, 1983, 1–11 | DOI | MR
[15] Berry D.A., Fristedt B., Bandit problems: sequential allocation of experiments, Chapman and Hall, London–New York, 1985 | MR | Zbl
[16] Bradt R.N., Johnson S.M., Karlin S., “On sequential designs for maximizing the sum of $n$ observations”, Ann. Math. Statist, 27 (1956), 1060–1074 | DOI | MR | Zbl
[17] Chernoff H., Ray S.N., “A Bayes sequential sampling inspection plan”, Ann. Math. Statist., 36 (1965), 1387–1407 | DOI | MR | Zbl
[18] Chernoff H., “Sequential models for clinical trials”, Proceedings of the Fifth Berkeley Symposium on Mathematical Statistics and Probability, v. 4, eds. Le Cam L. M., Neyman J., 1967, 805–812 | MR
[19] Gittins J.C., Multi-armed bandit allocation indices, Wiley-Interscience Series in Systems and Optimization, John Wiley Sons, Ltd, Chichester, 1989 | MR | Zbl
[20] Kaufmann E., “On Bayesian index policies for sequential resource allocation”, Annals of Statist., 46:2 (2018), 842–865 | DOI | MR | Zbl
[21] Kolnogorov A., “Gaussian one-armed bandit problem”, 2021 XVIIth International symposium “Problems of redundancy in information and control systems”, IEEE, M., 2021, 74–79 | DOI | MR
[22] Kolnogorov A.V., “Gaussian one-armed bandit with both unknown parameters”, Siberian Electronic Mathematical Reports, 19:2 (2022), 639–650 | MR
[23] Lai T.L., Levin B., Robbins H., Siegmund D., “Sequential medical trials (stopping rules/asymptotic optimality)”, Proc. Nati. Acad. Sci. USA, 77:6 (1980), 3135–3138 | DOI | MR | Zbl
[24] Lai T.L., “Adaptive treatment allocation and the multi-armed bandit problem”, Annals of Statist., 25 (1987), 1091–1114 | MR
[25] Lattimore T., Szepesvari C., Bandit algorithms, Cambridge University Press, Cambridge, 2020 | Zbl
[26] Lugosi G., Cesa-Bianchi N., Prediction, learning and games, Cambridge University Press, Cambridge, 2006 | MR | Zbl
[27] Perchet V., Rigollet P., Chassang S., Snowberg E., “Batched bandit problems”, Annals of Statist., 44:2 (2016), 660–681 | DOI | MR | Zbl
[28] Vogel W., “An asymptotic minimax theorem for the two-armed bandit problem”, Ann. Math. Stat., 31 (1960), 444–451 | DOI | MR | Zbl