Keywords: discounted Markov decision processes; dynamic programming; unique optimal policy; non-uniqueness of optimal policies; Ekeland's variational principle
@article{10_14736_kyb_2016_1_0066,
author = {Ortega-Guti\'errez, R. Israel and Montes-de-Oca, Ra\'ul and Lemus-Rodr{\'\i}guez, Enrique},
title = {Uniqueness of optimal policies as a generic property of discounted {Markov} decision processes: {Ekeland's} variational principle approach},
journal = {Kybernetika},
pages = {66--75},
year = {2016},
volume = {52},
number = {1},
doi = {10.14736/kyb-2016-1-0066},
mrnumber = {3482611},
zbl = {1374.90407},
language = {en},
url = {http://geodesic.mathdoc.fr/articles/10.14736/kyb-2016-1-0066/}
}
TY - JOUR AU - Ortega-Gutiérrez, R. Israel AU - Montes-de-Oca, Raúl AU - Lemus-Rodríguez, Enrique TI - Uniqueness of optimal policies as a generic property of discounted Markov decision processes: Ekeland's variational principle approach JO - Kybernetika PY - 2016 SP - 66 EP - 75 VL - 52 IS - 1 UR - http://geodesic.mathdoc.fr/articles/10.14736/kyb-2016-1-0066/ DO - 10.14736/kyb-2016-1-0066 LA - en ID - 10_14736_kyb_2016_1_0066 ER -
%0 Journal Article %A Ortega-Gutiérrez, R. Israel %A Montes-de-Oca, Raúl %A Lemus-Rodríguez, Enrique %T Uniqueness of optimal policies as a generic property of discounted Markov decision processes: Ekeland's variational principle approach %J Kybernetika %D 2016 %P 66-75 %V 52 %N 1 %U http://geodesic.mathdoc.fr/articles/10.14736/kyb-2016-1-0066/ %R 10.14736/kyb-2016-1-0066 %G en %F 10_14736_kyb_2016_1_0066
Ortega-Gutiérrez, R. Israel; Montes-de-Oca, Raúl; Lemus-Rodríguez, Enrique. Uniqueness of optimal policies as a generic property of discounted Markov decision processes: Ekeland's variational principle approach. Kybernetika, Tome 52 (2016) no. 1, pp. 66-75. doi: 10.14736/kyb-2016-1-0066
[1] Bertsekas, D. P.: Dynamic Programming: Deterministic and Stochastic Models. Prentice-Hall, NJ 1987. | MR | Zbl
[2] Bishop, E., Phelps, R. R.: The support functionals of a convex set. In: Proc. Sympos. Pure Math. Vol. VII, 1963 (V. L. Klee, ed.), Amer. Math. Soc., pp. 27-35. | DOI | MR | Zbl
[3] Borwein, J. M., Zhu, Q. J.: Techniques of Variational Analysis. Springer, New York 2005. | MR | Zbl
[4] Cruz-Suárez, D., Montes-de-Oca, R., Salem-Silva, F.: Conditions for the uniqueness of optimal policies of discounted Markov decision processes. Math. Methods Oper. Res. 60 (2004), 415-436. | DOI | MR | Zbl
[5] Cruz-Suárez, D., Montes-de-Oca, R.: Uniform convergence of the value iteration policies for discounted Markov decision processes. Bol. Soc. Mat. Mexicana 12 (2006), 133-152. | MR
[6] Ekeland, I.: On the variational principle. J. Math. Anal. Appl. 67 (1974), 324-353. | DOI | MR | Zbl
[7] Hernández-Lerma, O., Lasserre, J. B.: Discrete-Time Markov Control Processes: Basic Optimality Criteria. Springer-Verlag, New York 1996. | DOI | MR | Zbl
[8] Lucchetti, R.: Convexity and Well-Posed Problems. CMS Books in Mathematics, Springer, New York 2006. | DOI | MR | Zbl
[9] Montes-de-Oca, R., Lemus-Rodríguez, E.: An unbounded Berge's minimum theorem with applications to discounted Markov decision processes. Kybernetika 48 (2012), 268-286. | MR | Zbl
[10] Montes-de-Oca, R., Lemus-Rodríguez, E., Salem-Silva, F.: Nonuniqueness versus uniqueness of optimal policies in convex discounted Markov decision processes. J. Appl. Math. 2013 (2013), 1-5. | DOI | MR | Zbl
[11] Rockafellar, R. T., Wets, R. J. B.: Variational Analysis. Springer, New York 2004. | MR | Zbl
[12] Tanaka, K., Hosino, M., Kuroiwa, D.: On an $\varepsilon $-optimal policy of discrete time stochastic control processes. Bull. Inform. Cybernet. 27 (1995), 107-119. | MR
Cité par Sources :