@article{TVP_2019_64_3_a1,
author = {M. P. Savelov},
title = {Gittins index for simple family of {Markov} bandit processes with switching cost and no discounting},
journal = {Teori\^a vero\^atnostej i ee primeneni\^a},
pages = {442--455},
year = {2019},
volume = {64},
number = {3},
language = {ru},
url = {http://geodesic.mathdoc.fr/item/TVP_2019_64_3_a1/}
}
TY - JOUR AU - M. P. Savelov TI - Gittins index for simple family of Markov bandit processes with switching cost and no discounting JO - Teoriâ veroâtnostej i ee primeneniâ PY - 2019 SP - 442 EP - 455 VL - 64 IS - 3 UR - http://geodesic.mathdoc.fr/item/TVP_2019_64_3_a1/ LA - ru ID - TVP_2019_64_3_a1 ER -
M. P. Savelov. Gittins index for simple family of Markov bandit processes with switching cost and no discounting. Teoriâ veroâtnostej i ee primeneniâ, Tome 64 (2019) no. 3, pp. 442-455. http://geodesic.mathdoc.fr/item/TVP_2019_64_3_a1/
[1] J. Gittins, K. Glazebrook, R. Weber, Multi-armed bandit allocation indices, 2nd ed., John Wiley Sons, Ltd., Chichester, 2011, xvi+293 pp. | DOI | MR | Zbl
[2] J. N. Tsitsiklis, “A short proof of the Gittins index theorem”, Ann. Appl. Probab., 4:1 (1994), 194–199 | DOI | MR | Zbl
[3] D. A. Berry, B. Fristedt, Bandit problems. Sequential allocation of experiments, Monogr. Statist. Appl. Probab., Chapman Hall, London, 1985, viii+275 pp. | DOI | MR | Zbl
[4] D. A. Berry, B. Fristedt, “Maximizing the length of a success run for many-armed bandits”, Stochastic Process. Appl., 15:3 (1983), 317–325 | DOI | MR | Zbl
[5] T. L. Lai, H. Robbins, “Asymptotically efficient adaptive allocation rules”, Adv. in Appl. Math., 6:1 (1985), 4–22 | DOI | MR | Zbl
[6] K. D. Glazebrook, D. Ruiz-Hernandez, C. Kirkbride, “Some indexable families of restless bandit problems”, Adv. in Appl. Probab., 38:3 (2006), 643–672 | DOI | MR | Zbl
[7] P. Whittle, “Restless bandits: activity allocation in a changing world”, A celebration of applied probability, J. Appl. Probab., 25:A (1988), 287–298 | DOI | MR | Zbl
[8] J. S. Banks, R. K. Sundaram, “Switching costs and the Gittins index”, Econometrica, 62:3 (1994), 687–694 | DOI | Zbl
[9] Tackseung Jun, “A survey on the bandit problem with switching costs”, De Economist, 152:4 (2004), 513–541 | DOI
[10] M. Asawa, D. Teneketzis, “Multi-armed bandits with switching penalties”, IEEE Trans. Automat. Control, 41:3 (1996), 328–348 | DOI | MR | Zbl
[11] M. N. Katehakis, U. G. Rothblum, “Finite state multi-armed bandit problems: sensitive-discount, average-reward and average-overtaking optimality”, Ann. Appl. Probab., 6:3 (1996), 1024–1034 | DOI | MR | Zbl
[12] M. L. Puterman, Markov decision processes: discrete stochastic dynamic programming, Wiley Ser. Probab. Math. Statist. Appl. Probab. Statist., John Wiley Sons, Inc., New York, 1994, xx+649 pp. | MR | Zbl
[13] E. B. Dynkin, A. A. Yushkevich, Controlled Markov processes, Grundlehren Math. Wiss., 235, Springer-Verlag, Berlin–New York, 1979, xvii+289 pp. | MR | MR | Zbl
[14] K.-J. Bierth, “An expected average reward criterion”, Stochastic Process. Appl., 26:1 (1987), 123–140 | DOI | MR | Zbl
[15] E. A. Feinberg, “On controlled finite state Markov processes with compact control sets”, Theory Probab. Appl., 20:4 (1976), 856–862 | DOI | MR | Zbl
[16] G. P. Klimov, “Time-sharing service systems. I”, Theory Probab. Appl., 19:3 (1975), 532–551 | DOI | MR | Zbl
[17] M. L. Weitzman, “Optimal search for the best alternative”, Econometrica, 47:3 (1979), 641–654 | DOI | MR | Zbl
[18] R. Agrawal, M. V. Hedge, D. Teneketzis, “Asymptotically efficient adaptive allocation rules for the multiarmed bandit problem with switching cost”, IEEE Trans. Automat. Control., 33:10 (1988), 899–906 | DOI | MR | Zbl
[19] B. Doshi, “Policy improvement algorithm for continuous time Markov decision processes with switching costs”, Stochastic control theory and stochastic differential systems (Deutsch. Forschungsgemeinsch., Univ. Bonn, Bad Honnef, 1979), Lecture Notes in Control and Information Sci., 16, Springer, Berlin–New York, 1979, 320–331 | DOI | MR | Zbl
[20] R. A. Howard, Dynamic programming and Markov processes, The Technology Press of M.I.T., Cambridge, MA; John Wiley Sons, Inc., New York–London, 1960, viii+136 pp. | MR | MR | Zbl | Zbl
[21] J. G. Kemeny, J. L. Snell, A. W. Knapp, Denumerable Markov chains, Grad. Texts in Math., 40, 2nd ed., Springer-Verlag, New York–Heidelberg–Berlin, 1976, xii+484 pp. | DOI | MR | MR | Zbl | Zbl
[22] R. S. Sutton, A. G. Barto, Reinforcement learning. An introduction, Adapt. Comput. Mach. Learn., MIT Press, Cambridge, MA, 1998, 344 pp. | MR | Zbl