Keywords: semi-Markov decision processes; exponential cost; finite horizon; optimality equation; optimal policy
@article{10_14736_kyb_2022_3_0301,
author = {Huo, Haifeng and Wen, Xian},
title = {The exponential cost optimality for finite horizon {semi-Markov} decision processes},
journal = {Kybernetika},
pages = {301--319},
year = {2022},
volume = {58},
number = {3},
doi = {10.14736/kyb-2022-3-0301},
mrnumber = {4494093},
zbl = {07613047},
language = {en},
url = {http://geodesic.mathdoc.fr/articles/10.14736/kyb-2022-3-0301/}
}
TY - JOUR AU - Huo, Haifeng AU - Wen, Xian TI - The exponential cost optimality for finite horizon semi-Markov decision processes JO - Kybernetika PY - 2022 SP - 301 EP - 319 VL - 58 IS - 3 UR - http://geodesic.mathdoc.fr/articles/10.14736/kyb-2022-3-0301/ DO - 10.14736/kyb-2022-3-0301 LA - en ID - 10_14736_kyb_2022_3_0301 ER -
%0 Journal Article %A Huo, Haifeng %A Wen, Xian %T The exponential cost optimality for finite horizon semi-Markov decision processes %J Kybernetika %D 2022 %P 301-319 %V 58 %N 3 %U http://geodesic.mathdoc.fr/articles/10.14736/kyb-2022-3-0301/ %R 10.14736/kyb-2022-3-0301 %G en %F 10_14736_kyb_2022_3_0301
Huo, Haifeng; Wen, Xian. The exponential cost optimality for finite horizon semi-Markov decision processes. Kybernetika, Tome 58 (2022) no. 3, pp. 301-319. doi: 10.14736/kyb-2022-3-0301
[1] Bertsekas, D. P., Shreve, S. E.: Stochastic Optimal Control: The Discrete-Time Case. Academic Press, Inc. 1978. | MR
[2] Baüuerle, N., Rieder, U.: Markov Decision Processes with Applications to Finance. Springer, Heidelberg 2011 | MR
[3] Baüerle, N., Rieder, U.: More risk-sensitive Markov decision processes. Math. Oper. Res. 39 (2014), 105-120. | DOI | MR
[4] Cao, X. R.: Semi-Markov decision problems and performance sensitivity analysis. IEEE Trans. Automat. Control 48 (2003), 758-769. | DOI | MR
[5] Cavazos-Cadena, R., Montes-De-Oca, R.: Optimal stationary policies in risk-sensitive dynamic programs with finite state space and nonnegative rewards. Appl. Math. 27 (2000), 167-185. | DOI | MR
[6] Cavazos-Cadena, R., Montes-De-Oca, R.: Nearly optimal policies in risk-sensitive positive dynamic programming on discrete spaces. Math. Methl Oper. Res. 52 (2000), 133-167. | DOI | MR
[7] Chávez-Rodríguez, S., Cavazos-Cadena, R., Cruz-Suárez, H.: Controlled Semi-Markov chains with risk-sensitive average cost criterion. J. Optim. Theory Appl. 170 (2016), 670-686. | DOI | MR
[8] Chung, K. J., Sobel, M. J.: Discounted MDP's: distribution functions and exponential utility maximization. SIAM J. Control Optim. 25 (1987), 49-62. | DOI | MR
[9] Ghosh, M. K., Saha, S.: Risk-sensitive control of continuous time Markov chains. Stoch. Int. J. Probab. Stoch. Process. 86 (2014), 655-675. | DOI | MR
[10] Guo, X. P., Hernández-Lerma, O.: Continuous-Time Markov Decision Process: Theorey and Applications. Springer-Verlag, Berlin 2009. | MR
[11] Hernández-Lerma, O., Lasserre, J. B.: Discrete-Time Markov control process: Basic Optimality Criteria. Springer-Verlag, New York 1996. | MR
[12] Howard, R. A., Matheson, J. E.: Risk-sensitive Markov decision processes. Management Sci. 18 (1972), 356-369. | DOI | MR
[13] Huang, Y. H., Lian, Z. T., Guo, X. P.: Risk-sensitive semi-Markov decision processes with general utilities and multiple criteria. Adv. Appl. Probab. 50 (2018), 783-804. | DOI | MR
[14] Huang, Y. H., Guo, X. P.: Finite horizon semi-Markov decision processes with application to maintenance systems. Europ. J. Oper. Res. 212 (2011), 131-140. | DOI | MR
[15] Huang, X. X., Zou, X. L., Guo, X. P.: A minimization problem of the risk probability in first passage semi-Markov decision processes with loss rates. Sci. China Math. 58 (2015), 1923-1938. | DOI | MR
[16] Huo, H. F., Wen, X.: First passage risk probability optimality for continuous time Markov decision processes. Kybernetika 55 (2019), 114-133. | DOI | MR
[17] Jaśkiewicz, A.: A note on negative dynamic programming for risk-sensitive control. Oper. Res. Lett. 36 (2008), 531-534. | DOI | MR
[18] Janssen, J., Manca, R.: Semi-Markov Risk Models For Finance, Insurance, and Reliability. Springer, New York 2006. | MR
[19] Jaśkiewicz, A.: On the equivalence of two expected average cost criteria for semi Markov control processes. Math. Oper. Res. 29 (2013), 326-338. | DOI | MR
[20] Jaquette, S. C.: A utility criterion for Markov decision processes. Manag Sci. {\mi23} (1976), 43-49. | DOI | MR
[21] Luque-Vasquez, F., Minjarez-Sosa, J. A.: Semi-Markov control processes with unknown holding times distribution under a discounted criterion. Math. Methods Oper. Res. 61 (2005), 455-468. | DOI | MR
[22] Mamer, J. W.: Successive approximations for finite horizon semi-Markov decision processes with application to asset liquidation. Oper. Res. 34 (1986), 638-644. | DOI | MR
[23] Nollau, V.: Solution of a discounted semi-markovian descision problem by successiveoevarrelaxation. Optimization. 39, (1997), 85-97. | DOI | MR
[24] Puterman, M. L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. | MR
[25] Wei, Q.: Continuous-time Markov decision processes with risk-sensitive finite-horizon cost criterion. Math. Oper. Res. 84 (2016), 461-487. | DOI | MR
[26] Wu, X., Guo, X. P.: First passage optimality and variance minimization of Markov decision processes with varying discount factors. J. Appl. Prob. 52 (2015), 441-456. | DOI | MR
[27] Yushkevich, A. A.: On semi-Markov controlled models with average reward criterion. Theory Probab. Appl. 26 (1982), 808-815. | DOI | MR
[28] Zhang, Y.: Continuous-time Markov decision processes with exponential utility. SIAM J. Control Optim. 55 (2017), 2636-2666. | DOI | MR
Cité par Sources :