Keywords: discrete-time Markov decision processes; average reward criterion; optimal stationary policy; Lyapunov-type condition; unbounded reward/cost function
@article{10_14736_kyb_2015_2_0276,
author = {Zou, Xiaolong and Guo, Xianping},
title = {Another set of verifiable conditions for average {Markov} decision processes with {Borel} spaces},
journal = {Kybernetika},
pages = {276--292},
year = {2015},
volume = {51},
number = {2},
doi = {10.14736/kyb-2015-2-0276},
mrnumber = {3350562},
zbl = {06487079},
language = {en},
url = {http://geodesic.mathdoc.fr/articles/10.14736/kyb-2015-2-0276/}
}
TY - JOUR AU - Zou, Xiaolong AU - Guo, Xianping TI - Another set of verifiable conditions for average Markov decision processes with Borel spaces JO - Kybernetika PY - 2015 SP - 276 EP - 292 VL - 51 IS - 2 UR - http://geodesic.mathdoc.fr/articles/10.14736/kyb-2015-2-0276/ DO - 10.14736/kyb-2015-2-0276 LA - en ID - 10_14736_kyb_2015_2_0276 ER -
%0 Journal Article %A Zou, Xiaolong %A Guo, Xianping %T Another set of verifiable conditions for average Markov decision processes with Borel spaces %J Kybernetika %D 2015 %P 276-292 %V 51 %N 2 %U http://geodesic.mathdoc.fr/articles/10.14736/kyb-2015-2-0276/ %R 10.14736/kyb-2015-2-0276 %G en %F 10_14736_kyb_2015_2_0276
Zou, Xiaolong; Guo, Xianping. Another set of verifiable conditions for average Markov decision processes with Borel spaces. Kybernetika, Tome 51 (2015) no. 2, pp. 276-292. doi: 10.14736/kyb-2015-2-0276
[1] Arapostathis, A., al, et: Discrete time controlled Markov processes with average cost criterion: a survey. SIAM J. Control Optim. 31 (1993), 282-344. | DOI | MR
[2] Casella, G., Berger, R. L.: Statistical Inference. Second edition. Duxbury Thomson Learning 2002.
[3] Dynkin, E. B., Yushkevich, A. A.: Controlled Markov Processes. Springer, New York 1979. | MR
[4] Gordienko, E., Hernández-Lerma, O.: Average cost Markov control processes with weighted norms: existence of canonical policies. Appl. Math. (Warsaw) 23 (1995), 2, 199-218. | MR | Zbl
[5] Guo, X. P., Shi, P.: Limiting average criteria for nonstationary Markov decision processes. SIAM J. Optim. 11 (2001), 4, 1037-1053. | DOI | MR | Zbl
[6] Guo, X. P., Zhu, Q. X.: Average optimality for Markov decision processes in Borel spaces: A new condition and approach. J. Appl. Probab. 43 (2006), 318-334. | DOI | MR | Zbl
[7] Hernández-Lerma, O., Lasserre, J. B.: Discrete-Time Markov Control Processes. Springer, New York 1996. | DOI | MR | Zbl
[8] Hernández-Lerma, O., Lasserre, J. B.: Further Topics on Discrete-Time Markov Control Processes. Springer, New York 1999. | DOI | MR | Zbl
[9] Kakumanu, M.: Nondiscounted continuous time Markov decision process with countable state space. SIAM J. Control Optim. 10 (1972), 1, 210-220. | DOI | MR
[10] Lund, R. B., Tweedie, R. L.: Geometric convergence rates for stochastically ordered Markov chains. Math. Oper. Res. 21 (1996), 1, 182-194. | DOI | MR | Zbl
[11] Meyn, S. P., Tweedie, R. L.: Markov Chains and Stochastic Stability. Cambridge Univ. Press, New York 2009. | DOI | MR | Zbl
[12] Puterman, M. L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. John Wiley, New York 1994. | DOI | MR | Zbl
[13] Sennott, L. I.: Average reward optimization theory for denumerable state spaces. In: Handbook of Markov Decision Processes (Int. Ser. Operat. Res. Manag. Sci. 40) (E. A. Feinberg and A. Shwartz Kluwer, eds.), Boston, pp. 153-172. | DOI | MR | Zbl
[14] Sennott, L. I.: Stochastic Dynamic Programming and the Control of Queueing Systems. Wiley, New York 1999. | DOI | MR | Zbl
[15] Zhu, Q. X.: Average optimality for continuous-time jump Markov decision processes with a policy iteration approach. J. Math. Anal. Appl. 339 (2008), 1, 691-704. | DOI | MR
Cité par Sources :