Keywords: partition of the state space; nonconstant optimal average cost; discounted approximations to the risk-sensitive average cost criterion; equality of superior and inferior limit risk-averse average criteria
@article{KYB_2012_48_1_a4,
author = {Alan{\'\i}s-Dur\'an, Alfredo and Cavazos-Cadena, Rolando},
title = {An optimality system for finite average {Markov} decision chains under risk-aversion},
journal = {Kybernetika},
pages = {83--104},
year = {2012},
volume = {48},
number = {1},
mrnumber = {2932929},
zbl = {1243.93127},
language = {en},
url = {http://geodesic.mathdoc.fr/item/KYB_2012_48_1_a4/}
}
TY - JOUR AU - Alanís-Durán, Alfredo AU - Cavazos-Cadena, Rolando TI - An optimality system for finite average Markov decision chains under risk-aversion JO - Kybernetika PY - 2012 SP - 83 EP - 104 VL - 48 IS - 1 UR - http://geodesic.mathdoc.fr/item/KYB_2012_48_1_a4/ LA - en ID - KYB_2012_48_1_a4 ER -
Alanís-Durán, Alfredo; Cavazos-Cadena, Rolando. An optimality system for finite average Markov decision chains under risk-aversion. Kybernetika, Tome 48 (2012) no. 1, pp. 83-104. http://geodesic.mathdoc.fr/item/KYB_2012_48_1_a4/
[1] A. Arapstathis, V. K. Borkar, E. Fernández-Gaucherand, M. K. Gosh, S. I. Marcus: Discrete-time controlled Markov processes with average cost criteria: a survey. SIAM J. Control Optim. 31 (1993), 282-334. | DOI | MR
[2] P. Billingsley: Probability and Measure. Third edition. Wiley, New York 1995. | MR | Zbl
[3] R. Cavazos-Cadena, E. Fernández-Gaucherand: Controlled Markov chains with risk-sensitive criteria: average cost, optimality equations and optimal solutions. {Math. Method Optim. Res.} 43 (1999), 121-139. | MR | Zbl
[4] R. Cavazos-Cadena, E. Fernández-Gaucherand: Risk-sensitive control in communicating average Markov decision chains. In: { Modelling Uncertainty: An examination of Stochastic Theory, Methods and Applications} (M. Dror, P. L'Ecuyer and F. Szidarovsky, eds.), Kluwer, Boston 2002, pp. 525-544.
[5] R. Cavazos-Cadena: Solution to the risk-sensitive average cost optimality equation in a class of Markov decision processes with finite state space. {Math. Method Optim. Res.} 57 (2003), 263-285. | DOI | MR | Zbl
[6] R. Cavazos-Cadena, D. Hernández-Hernández: A characterization of the optimal risk-sensitive average cost in finite controlled Markov chains. {Ann. App. Probab.}, 15 (2005), 175-212. | DOI | MR | Zbl
[7] R. Cavazos-Cadena, D. Hernández-Hernández: A system of Poisson equations for a non-constant Varadhan functional on a finite state space. {Appl. Math. Optim.} 53 (2006), 101-119. | DOI | MR
[8] R. Cavazos-Cadena, F. Salem-Silva: The discounted method and equivalence of average criteria for risk-sensitive Markov decision processes on Borel spaces. { Appl. Math. Optim.} 61 (2009), 167-190. | DOI | MR
[9] G. B. Di Masi, L. Stettner: Risk-sensitive control of discrete time Markov processes with infinite horizon. {SIAM J. Control Optim.} 38 1999, 61-78. | DOI | MR | Zbl
[10] G. B. Di Masi, L. Stettner: Infinite horizon risk sensitive control of discrete time Markov processes with small risk. {Syst. Control Lett.} 40 (2000), 15-20. | DOI | MR | Zbl
[11] G. B. Di Masi, L. Stettner: Infinite horizon risk sensitive control of discrete time Markov processes under minorization property. {SIAM J. Control Optim.} 46 (2007), 231-252. | DOI | MR | Zbl
[12] W. H. Fleming, W. M. McEneany: Risk-sensitive control on an infinite horizon. {SIAM J. Control Optim.} 33 (1995), 1881-1915. | DOI | MR
[13] F. R. Gantmakher: The Theory of Matrices. {Chelsea}, London 1959.
[14] D. Hernández-Hernández, S. I. Marcus: Risk-sensitive control of Markov processes in countable state space. {Syst. Control Lett.} 29 (1996), 147-155. | DOI | MR | Zbl
[15] D. Hernández-Hernández, S. I. Marcus: Existence of risk sensitive optimal stationary policies for controlled Markov processes. {Appl. Math. Optim.} 40 (1999), 273-285. | DOI | MR | Zbl
[16] A. R. Howard, J. E. Matheson: Risk-sensitive Markov decision processes. {Management Sci.} 18 (1972), 356-369. | DOI | MR | Zbl
[17] D. H. Jacobson: Optimal stochastic linear systems with exponential performance criteria and their relation to stochastic differential games. {IEEE Trans. Automat. Control} 18 (1973), 124-131. | DOI | MR
[18] S. C. Jaquette: Markov decison processes with a new optimality criterion: discrete time. {Ann. Statist.} 1 (1973), 496-505. | DOI | MR
[19] S. C. Jaquette: A utility criterion for Markov decision processes. {Management Sci.} 23 (1976), 43-49. | DOI | MR | Zbl
[20] A. Jaśkiewicz: Average optimality for risk sensitive control with general state space. {Ann. App. Probab.} 17 (2007), 654-675. | DOI | MR | Zbl
[21] U. G. Rothblum, P. Whittle: Growth optimality for branching Markov decision chains. {Math. Oper. Res.} 7 (1982), 582-601. | DOI | MR | Zbl
[22] K. Sladký: Successive approximation methods for dynamic programming models. In: Proc. Third Formator Symposium on the Analysis of Large-Scale Systems (J. Beneš and L. Bakule, eds.), Academia, Prague 1979, pp. 171-189. | Zbl
[23] K. Sladký: Bounds on discrete dynamic programming recursions I. {Kybernetika} 16 (1980), 526-547. | MR | Zbl
[24] K. Sladký: Growth rates and average optimality in risk-sensitive Markov decision chains. {Kybernetika} 44 (2008), 205-226. | MR | Zbl
[25] K. Sladký, R. Montes-de-Oca: Risk-sensitive average optimality in Markov decision chains. In: Operations Research Proceedings, Vol. 2007, Part III (2008), pp. 69-74. | DOI | Zbl
[26] P. Whittle: Optimization Over Time-Dynamic Programming and Stochastic Control. Wiley, Chichester 1983. | MR
[27] W. H. M. Zijm: Nonnegative Matrices in Dynamic Programming. Mathematical Centre Tract, Amsterdam 1983. | MR | Zbl