Voir la notice de l'article provenant de la source Math-Net.Ru
@article{MGTA_2017_9_4_a4, author = {Dmitriy S. Smirnov and Ekaterina V. Gromova}, title = {Decision-making model under presence of experts as a modified multi-armed bandit problem}, journal = {Matemati\v{c}eska\^a teori\^a igr i e\"e prilo\v{z}eni\^a}, pages = {69--87}, publisher = {mathdoc}, volume = {9}, number = {4}, year = {2017}, language = {ru}, url = {http://geodesic.mathdoc.fr/item/MGTA_2017_9_4_a4/} }
TY - JOUR AU - Dmitriy S. Smirnov AU - Ekaterina V. Gromova TI - Decision-making model under presence of experts as a modified multi-armed bandit problem JO - Matematičeskaâ teoriâ igr i eë priloženiâ PY - 2017 SP - 69 EP - 87 VL - 9 IS - 4 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/MGTA_2017_9_4_a4/ LA - ru ID - MGTA_2017_9_4_a4 ER -
%0 Journal Article %A Dmitriy S. Smirnov %A Ekaterina V. Gromova %T Decision-making model under presence of experts as a modified multi-armed bandit problem %J Matematičeskaâ teoriâ igr i eë priloženiâ %D 2017 %P 69-87 %V 9 %N 4 %I mathdoc %U http://geodesic.mathdoc.fr/item/MGTA_2017_9_4_a4/ %G ru %F MGTA_2017_9_4_a4
Dmitriy S. Smirnov; Ekaterina V. Gromova. Decision-making model under presence of experts as a modified multi-armed bandit problem. Matematičeskaâ teoriâ igr i eë priloženiâ, Tome 9 (2017) no. 4, pp. 69-87. http://geodesic.mathdoc.fr/item/MGTA_2017_9_4_a4/
[1] Borovkov A. A., Matematicheskaya statistika: dopolnitelnye glavy, Nauka, M., 1984
[2] Bure V. M., Parilina E. M., Teoriya veroyatnostei i matematicheskaya statistika, Lan, M., 2013, 416 pp.
[3] Lazutchenko A. N., “O robastnom upravlenii v sluchainoi srede, kharakterizuemoi normalnym raspredeleniem dokhodov s razlichnymi dispersiyami”, Trudy Karelskogo nauchnogo tsentra Rossiiskoi akademii nauk, 2015, no. 10
[4] Smirnov D. S., “Testirovanie internet-stranits kak reshenie zadachi o mnogorukom bandite”, Molodoi uchenyi, 2015, no. 19, 78–86
[5] Smirnov D. S., “Ispolzovanie zadachi o mnogorukom bandite v testirovanii veb-stranits”, Protsessy upravleniya i ustoichivost, 3:1 (2016), 705–710
[6] Smirnov D. S., “Zadacha o mnogorukom bandite pri nalichii eksperta”, Protsessy upravleniya i ustoichivost, 4(20):1 (2017), 681–685
[7] Auer P., Cesa-Bianchi N., Fischer P., “Finite-time Analysis of the Multiarmed Bandit Problem”, Machine Learning, 47:2–3 (2002), 235–256 | DOI
[8] Auer P. et al., “The nonstochastic multiarmed bandit problem”, SIAM journal on computing, 32:1 (2002), 48–77 | DOI | MR
[9] Awerbuch B., Kleinberg R., “Online linear optimization and adaptive routing”, Journal of Computer and System Sciences, 74:1 (2008), 97–114 | DOI | MR
[10] Bather J. A., “The Minimax Risk for the Two-Armed Bandit Problem”, Mathematical Learning Models – Theory and Algorithms, Lecture Notes in Statistics, 20, Springer-Verlag, New York Inc., 1983, 1–11 | DOI | MR
[11] Chu W. et al., “Contextual Bandits with Linear Payoff Functions”, AISTATS, 15 (2011), 208–214
[12] Hardwick J. et al., “Bandit strategies for ethical sequential allocation”, Computing Science and Statistics, 23:6.1 (1991), 421–424
[13] Kuleshov V., Precup D., “Algorithms for the multi-armed bandit problem”, Journal of Machine Learning Research, 2000, 1–48
[14] Lage R. et al., “Choosing which message to publish on social networks: A contextual bandit approach”, 2013 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM), IEEE, 2013, 620–627
[15] Lai T. L., Robbins H., “Asymptotically efficient adaptive allocation rules”, Advances in applied mathematics, 1985, no. 6, 4–22 | DOI | MR
[16] Lai T. Z., “Adaptive treatment allocation and the multi-armed bandit problem”, The annals of statistics, 15 (1987), 1091–1114 | DOI | MR
[17] Langford J., Zhang T., “The epoch-greedy algorithm for multi-armed bandits with side information”, Advances in neural information processing systems, 2008, 817–824
[18] Li L. et al., “A contextual-bandit approach to personalized news article recommendation”, Proceedings of the 19th international conference on World wide web, ACM, 2010, 661–670
[19] Lu T., Pal D., Pal M., “Contextual Multi-Armed Bandits”, AISTATS, 2010, 485–492
[20] Pandey S., Olston C., “Handling advertisements of unknown quality in search advertising”, NIPS, 20 (2006), 1065–1072
[21] Robbins H., “Some aspects of the sequential design of experiments”, Herbert Robbins Selected Papers, Springer, New York, 1985, 169–177 | MR
[22] Schwartz E. M., Misra K., Abernethy J., Dynamic Online Pricing with Incomplete Information Using Multi-Armed Bandit Experiments, 2016
[23] Shen W. et al., “Portfolio Choices with Orthogonal Bandit Learning”, IJCAI, 2015, 974–980
[24] Strehl A. L. et al., “Experience-efficient learning in associative bandit problems”, Proceedings of the 23rd international conference on Machine learning, ACM, 2006, 889–896
[25] Sutton R. S., Barto A. G., Reinforcement learning: An introduction, v. 1, MIT press, Cambridge, 1998
[26] Thompson W. R., “On the likelihood that one unknown probability exceeds another in view of the evidence of two samples”, Biometrika, 25:3/4 (1933), 285–294 | DOI
[27] Woodroofe M., “A one-armed bandit problem with a concomitant variable”, Journal of the American Statistical Association, 74:368 (1979), 799–806 | DOI | MR