Keywords: Markov games; empirical estimation; discounted and average criteria
@article{10_14736_kyb_2017_4_0694,
author = {Luque-V\'asquez, Fernando and Minj\'arez-Sosa, J. Adolfo},
title = {Empirical approximation in {Markov} games under unbounded payoff: discounted and average criteria},
journal = {Kybernetika},
pages = {694--716},
year = {2017},
volume = {53},
number = {4},
doi = {10.14736/kyb-2017-4-0694},
mrnumber = {3730259},
zbl = {06819631},
language = {en},
url = {http://geodesic.mathdoc.fr/articles/10.14736/kyb-2017-4-0694/}
}
TY - JOUR AU - Luque-Vásquez, Fernando AU - Minjárez-Sosa, J. Adolfo TI - Empirical approximation in Markov games under unbounded payoff: discounted and average criteria JO - Kybernetika PY - 2017 SP - 694 EP - 716 VL - 53 IS - 4 UR - http://geodesic.mathdoc.fr/articles/10.14736/kyb-2017-4-0694/ DO - 10.14736/kyb-2017-4-0694 LA - en ID - 10_14736_kyb_2017_4_0694 ER -
%0 Journal Article %A Luque-Vásquez, Fernando %A Minjárez-Sosa, J. Adolfo %T Empirical approximation in Markov games under unbounded payoff: discounted and average criteria %J Kybernetika %D 2017 %P 694-716 %V 53 %N 4 %U http://geodesic.mathdoc.fr/articles/10.14736/kyb-2017-4-0694/ %R 10.14736/kyb-2017-4-0694 %G en %F 10_14736_kyb_2017_4_0694
Luque-Vásquez, Fernando; Minjárez-Sosa, J. Adolfo. Empirical approximation in Markov games under unbounded payoff: discounted and average criteria. Kybernetika, Tome 53 (2017) no. 4, pp. 694-716. doi: 10.14736/kyb-2017-4-0694
[1] Chang, H. S.: Perfect information two-person zero-sum Markov games with imprecise transition probabilities. Math. Meth. Oper. Res. 64 (2006), 235-351. | DOI | MR
[2] Dudley, R. M.: The speed of mean Glivenko-Cantelli convergence. Ann. Math. Stat. 40 (1969), 40-50. | DOI | MR
[3] Dynkin, E. B., Yushkevich, A. A.: Controlled Markov Processes. Springer-Verlag, New York 1979. | DOI | MR
[4] Fernández-Gaucherand, E.: A note on the Ross-Taylor Theorem. Appl. Math. Comp. 64 (1994), 207-212. | DOI | MR
[5] Filar, J., Vrieze, K.: Competitive Markov Decision Processes. Springer-Verlag, New York 1997. | DOI | MR
[6] Ghosh, M. K., McDonald, D., Sinha, S.: Zero-sum stochastic games with partial information. J. Optim. Theory Appl. 121 (2004), 99-118. | DOI | MR
[7] Gordienko, E. I.: Adaptive strategies for certain classes of controlled Markov processes. Theory Probab. Appl. 29 (1985), 504-518. | DOI | MR
[8] Gordienko, E. I., Hernández-Lerma, O.: Average cost Markov control processes with weighted norms: existence of canonical policies. Appl. Math. 23 (1995), 199-218. | MR | Zbl
[9] Gordienko, E. I., Hernández-Lerma, O.: Average cost Markov control processes with weighted norms: value iteration. Appl. Math. 23 (1995), 219-237. | MR
[10] Hernández-Lerma, O., Lasserre, J. B.: Discrete-Time Markov Control Processes: Basic Optimality Criteria. Springer-Verlag, New York 1996. | DOI | MR | Zbl
[11] Hilgert, N., Minjárez-Sosa, J. A.: Adaptive control of stochastic systems with unknown disturbance distribution: discounted criterion. Math. Meth. Oper. Res. 63 (2006), 443-460. | DOI | MR
[12] Jaśkiewicz, A., Nowak, A.: Zero-sum ergodic stochastic games with Feller transition probabilities. SIAM J. Control Optim. 45 (2006), 773-789. | DOI | MR
[13] Jaśkiewicz, A., Nowak, A.: Approximation of noncooperative semi-Markov games. J. Optim. Theory Appl. 131 (2006), 115-134. | DOI | MR
[14] Krausz, A., Rieder, U.: Markov games with incomplete information. Math. Meth. Oper. Res. 46 (1997), 263-279. | DOI | MR
[15] Minjárez-Sosa, J. A.: Nonparametric adaptive control for discrete-time Markov processes with unbounded costs under average criterion. Appl. Math. (Warsaw) 26 (1999), 267-280. | DOI | MR
[16] Minjárez-Sosa, J. A., Vega-Amaya, O.: Asymptotically optimal strategies for adaptive zero-sum discounted Markov games. SIAM J. Control Optim. 48 (2009), 1405-1421. | DOI | MR
[17] Minjárez-Sosa, J. A., Vega-Amaya, O.: Optimal strategies for adaptive zero-sum average Markov games. J. Math. Analysis Appl. 402 (2013), 44-56. | DOI | MR
[18] Minjárez-Sosa, J. A., Luque-Vásquez, F.: Two person zero-sum semi-Markov games with unknown holding times distribution on one side: discounted payoff criterion. Appl. Math. Optim. 57 (2008), 289-305. | DOI | MR
[19] Neyman, A., Sorin, S.: Stochastic Games and Applications. Kluwer, 2003. | DOI | MR
[20] Prieto-Rumeau, T., Lorenzo, J. M.: Approximation of zero-sum continuous-time Markov games under the discounted payoff criterion. TOP 23 (2015), 799-836. | DOI | MR
[21] Shimkin, N., Shwartz, A.: Asymptotically efficient adaptive strategies in repeated games. Part I: Certainty equivalence strategies. Math. Oper. Res. 20 (1995), 743-767. | DOI | MR
[22] Shimkin, N., Shwartz, A.: Asymptotically efficient adaptive strategies in repeated games. Part II: Asymptotic optimality. Math. Oper. Res. 21 (1996), 487-512. | DOI | MR
[23] Schäl, M.: Conditions for optimality and for the limit of $n$-stage optimal policies to be optimal. Z. Wahrs. Verw. Gerb. 32 (1975), 179-196. | DOI | MR
[24] Rao, R. Ranga: Relations between weak and uniform convergence of measures with applications. Ann. Math. Statist. 33 (1962), 659-680. | DOI | MR
[25] Nunen, J. A. E. E. Van, Wessels, J.: A note on dynamic programming with unbounded rewards. Manag. Sci. 24 (1978), 576-580. | DOI | MR
Cité par Sources :