Keywords: controlled Markov processes; finite state space; asymptotic behavior; risk-sensitive average optimality
@article{10_14736_kyb_2018_6_1218,
author = {Sladk\'y, Karel},
title = {Risk-sensitive average optimality in {Markov} decision processes},
journal = {Kybernetika},
pages = {1218--1230},
year = {2018},
volume = {54},
number = {6},
doi = {10.14736/kyb-2018-6-1218},
mrnumber = {3902630},
zbl = {07031770},
language = {en},
url = {http://geodesic.mathdoc.fr/articles/10.14736/kyb-2018-6-1218/}
}
Sladký, Karel. Risk-sensitive average optimality in Markov decision processes. Kybernetika, Tome 54 (2018) no. 6, pp. 1218-1230. doi: 10.14736/kyb-2018-6-1218
[1] Arapostathis, A., Borkar, V. S., Fernandez-Gaucherand, F., Ghosh, M. K., Marcus, S. I.: Discrete-time controlled Markov processes with average cost criterion: A survey. SIAM J. Control Optim. 31 (1993), 282-344. | DOI | MR
[2] Bather, J.: Optimal decisions procedures for finite Markov chains, Part II. Adv. Appl. Probab. 5 (1973), 328-339. | DOI | MR
[3] Bielecki, T. D., Hernández-Hernández, D., Pliska, S. R.: Risk-sensitive control of finite state Markov chains in discrete time, with application to portfolio management. Math. Methods Oper. Res. 50 (1999), 167-188. | DOI | MR
[4] Cavazos-Cadena, R.: Value iteration and approximately optimal stationary policies in finite-state average Markov chains. Math. Methods Oper. Res. 56 (2002), 181-196. | DOI | MR
[5] Cavazos-Cadena, R.: Solution to the risk-sensitive average cost optimality equation in a class of Markov decision processes with finite state space. Math. Methods Oper. Res. 57 (2003), 2, 263-285. | DOI | MR
[6] Cavazos-Cadena, R.: Solution of the average cost optimality equation for finite Markov decision chains: risk-sensitive and risk-neutral criteria. Math. Methods Oper. Res. 70 (2009), 541-566. | DOI | MR
[7] Cavazos-Cadena, R., Fernandez-Gaucherand, F.: Controlled Markov chains with risk-sensitive criteria: average cost, optimality equations and optimal solutions. Math. Methods Oper. Res. 43 (1999), 121-139. | MR
[8] Cavazos-Cadena, R., Hernández-Hernández, D.: A characterization exponential functionals in finite Markov chains. Math. Methods Oper. Res. 60 (2004), 399-414. | DOI | MR
[9] Cavazos-Cadena, R., Hernández-Hernández, D.: A characterization of the optimal risk-sensitive average cost in finite controlled Markov chains. Ann. Appl. Probab. 15 (2005), 175-212. | DOI | MR
[10] Cavazos-Cadena, R., Hernández-Hernández, D.: Necessary and sufficient conditions for a solution to the risk-sensitive Poisson equation on a finite state space. System Control Lett. 58 (2009), 254-258. | DOI | MR
[11] Cavazos-Cadena, R., Montes-de-Oca, R.: The value iteration algorithm in risk-sensitive average Markov decision chains with finite state space. Math. Oper. Res. 28 (2003), 752-756. | DOI | MR
[12] Cavazos-Cadena, R., Montes-de-Oca, R.: Nonstationary value iteration in controlled Markov chains with risk-sensitive average criterion. J. Appl. Probab. 42 (2005), 905-918. | DOI | MR
[13] Cavazos-Cadena, R., Feinberg, A., Montes-de-Oca, R.: A note on the existence of optimal policies in total reward dynamic programs with compact action sets. Math. Oper. Res. 25 (2000), 657-666. | DOI | MR
[14] Gantmakher, F. R.: The Theory of Matrices. Chelsea, London 1959. | MR
[15] Howard, R. A.: Dynamic Programming and Markov Processes. MIT Press, Cambridge, Mass. 1960. | MR
[16] Howard, R. A., Matheson, J.: Risk-sensitive Markov decision processes. Manag. Sci. 23 (1972), 356-369. | DOI | MR
[17] Mandl, P.: On the variance in controlled Markov chains. Kybernetika 7 (1971), 1-12. | MR | Zbl
[18] Mandl, P.: Estimation and control in Markov chains. Adv. Appl. Probab. 6 (1974), 40-60. | DOI | MR
[19] Markowitz, H.: Portfolio selection. J. Finance 7 (1952), 77-92. | DOI | MR
[20] Markowitz, H.: Portfolio Selection - Efficient Diversification of Investments. Wiley, New York 1959. | MR
[21] Puterman, M. L.: Markov Decision Processes - Discrete Stochastic Dynamic Programming. Wiley, New York 1994. | DOI | MR
[22] Ross, S. M.: Introduction to Stochastic Dynamic Programming. Academic Press, New York 1983. | MR
[23] Sladký, K.: Necessary and sufficient optimality conditions for average reward of controlled Markov chains. Kybernetika 9 (1973), 124-137. | MR
[24] Sladký, K.: On the set of optimal controls for Markov chains with rewards. Kybernetika 10 (1974), 526-547. | MR
[25] Sladký, K.: Growth rates and average optimality in risk-sensitive Markov decision chains. Kybernetika 44 (2008), 205-226. | MR
[26] Sladký, K.: Risk-sensitive and average optimality in Markov decision processes. In: Proc. 30th Int. Conf. Math. Meth. Economics 2012, Part II (J.Ramík and D.Stavárek, eds.), Silesian University, School of Business Administration, Karviná 2012, pp. 799-804. | DOI
[27] Sladký, K.: Risk-sensitive and mean variance optimality in Markov decision processes. Acta Oeconomica Pragensia 7 (2013), 146-161.
[28] Dijk, N. M. van, Sladký, K.: On the total reward variance for continuous-time Markov reward chains. J. Appl. Probab. 43 (2006), 1044-1052. | DOI | MR
Cité par Sources :