Keywords: finite state Markov decision processes; discounted and average costs; elimination of suboptimal policies
@article{KYB_2010_46_3_a18,
author = {Sladk\'y, Karel},
title = {Identification of optimal policies in {Markov} decision processes},
journal = {Kybernetika},
pages = {558--570},
year = {2010},
volume = {46},
number = {3},
mrnumber = {2676091},
zbl = {1195.93148},
language = {en},
url = {http://geodesic.mathdoc.fr/item/KYB_2010_46_3_a18/}
}
Sladký, Karel. Identification of optimal policies in Markov decision processes. Kybernetika, Tome 46 (2010) no. 3, pp. 558-570. http://geodesic.mathdoc.fr/item/KYB_2010_46_3_a18/
[1] Cruz-Suárez, D., Montes-de-Oca, R.: Uniform convergence of the value iteration policies for discounted Markov decision processes. Bol. de la Soc. Mat. Mexicana 12 (2006), 133–148. | MR
[2] Cruz-Suárez, D., Montes-de-Oca, R., Salem-Silva, F.: Uniform approximations of discounted Markov decision processes to optimal policies. Proceedings of Prague Stochastics 2006 (M. Hušková and M. Janžura, eds.), Matfyzpress, Prague 2006, pp. 278–287.
[3] Grinold, J.: Elimination of suboptimal actions in Markov decision problems. Oper. Res. 21 (1973), 848–851. | DOI | MR | Zbl
[4] Hastings, N. A. J.: Bounds on the gain of a Markov decision processes. Oper. Res. 19 (1971), 240–243. | DOI
[5] Hastings, N. A. J., Mello, J.: Tests for suboptimal actions in discounted Markov programming. Manag. Sci. 19 (1971), 1019–1022. | DOI | MR | Zbl
[6] Hastings, N. A. J., Mello, J.: Tests for suboptimal actions in undiscounted Markov decision chains. Manag. Sci. 23 (1976), 87–91. | DOI | MR
[7] MacQueen, J.: A modified dynamic programming method for Markov decision problems. J. Math. Anal. Appl. 14 (1966), 38–43. | DOI | MR
[8] MacQueen, J.: A test of suboptimal actions in Markovian decision problems. Oper. Res. 15 (1967), 559–561. | DOI
[9] Odoni, A. R.: On finding the maximal gain for Markov decision processes. Oper. Res. 17 (1969), 857–860. | DOI | MR | Zbl
[10] Puterman, M. L., Shin, M. C.: Modified policy iteration algorithms for discounted Markov decision problems. Manag. Sci. 24 (1978), 1127–1137. | DOI | MR | Zbl
[11] Puterman, M. L., Shin, M. C.: Action elimination procedures for modified policy iteration algorithm. Oper. Res. 30 (1982), 301–318. | DOI | MR
[12] Puterman, M. L.: Markov Decision Processes – Discrete Stochastic Dynamic Programming. Wiley, New York 1994. | MR | Zbl
[13] Sladký, K.: O metodě postupných aproximací pro nalezení optimálního řízení markovského řetězce (On successive approximation method for finding optimal control of a Markov chain). Kybernetika 4 (1969), 2, 167–176.
[14] White, D. J.: Dynamic programming, Markov chains and the method of successive approximation. J. Math. Anal. Appl. 6 (1963), 296–306. | DOI | MR