Voir la notice de l'article provenant de la source Library of Science
@article{IJAMCS_2020_30_2_a12, author = {D{\'\i}az, Henry and Sala, Antonio and Armesto, Leopoldo}, title = {A linear programming methodology for approximate dynamic programming}, journal = {International Journal of Applied Mathematics and Computer Science}, pages = {363--375}, publisher = {mathdoc}, volume = {30}, number = {2}, year = {2020}, language = {en}, url = {http://geodesic.mathdoc.fr/item/IJAMCS_2020_30_2_a12/} }
TY - JOUR AU - Díaz, Henry AU - Sala, Antonio AU - Armesto, Leopoldo TI - A linear programming methodology for approximate dynamic programming JO - International Journal of Applied Mathematics and Computer Science PY - 2020 SP - 363 EP - 375 VL - 30 IS - 2 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/IJAMCS_2020_30_2_a12/ LA - en ID - IJAMCS_2020_30_2_a12 ER -
%0 Journal Article %A Díaz, Henry %A Sala, Antonio %A Armesto, Leopoldo %T A linear programming methodology for approximate dynamic programming %J International Journal of Applied Mathematics and Computer Science %D 2020 %P 363-375 %V 30 %N 2 %I mathdoc %U http://geodesic.mathdoc.fr/item/IJAMCS_2020_30_2_a12/ %G en %F IJAMCS_2020_30_2_a12
Díaz, Henry; Sala, Antonio; Armesto, Leopoldo. A linear programming methodology for approximate dynamic programming. International Journal of Applied Mathematics and Computer Science, Tome 30 (2020) no. 2, pp. 363-375. http://geodesic.mathdoc.fr/item/IJAMCS_2020_30_2_a12/
[1] Allgower, F. and Zheng, A. (2012). Nonlinear Model Predictive Control, Springer, New York, NY.
[2] Ariño, C., Querol, A. and Sala, A. (2017). Shape-independent model predictive control for Takagi–Sugeno fuzzy systems, Engineering Applications of Artificial Intelligence 65(1): 493–505.
[3] Armesto, L., Girbés,V., Sala, A., Zima, M. and Šmídl, V. (2015). Duality-based nonlinear quadratic control: Application to mobile robot trajectory-following, IEEE Transactions on Control Systems Technology 23(4): 1494–1504.
[4] Armesto, L., Moura, J., Ivan, V., Erden, M.S., Sala, A. and Vijayakumar, S. (2018). Constraint-aware learning of policies by demonstration, International Journal of Robotics Research 37(13–14): 1673–1689.
[5] Bertsekas, D.P. (2017). Dynamic Programming and Optimal Control, Vol. 1, 4th Edn, Athena Scientific, Belmont, MA.
[6] Bertsekas, D.P. (2019). Reinforcement Learning and Optimal Control, Athena Scientific, Belmont, MA.
[7] Busoniu, L., Babuska, R., De Schutter, B. and Ernst, D. (2010). Reinforcement Learning and Dynamic Programming Using Function Approximators, CRC Press, Boca Raton, FL.
[8] Cervellera, C., Wen, A. and Chen, V.C. (2007). Neural network and regression spline value function approximations for stochastic dynamic programming, Computers Operations Research 34(1): 70–90.
[9] De Farias, D.P. and Van Roy, B. (2003). The linear programming approach to approximate dynamic programming, Operations Research 51(6): 850–865.
[10] Deisenroth, M.P., Neumann, G. and Peters, J. (2013). A survey on policy search for robotics, Foundations and Trends in Robotics 2(1–2): 1–142.
[11] Díaz, H., Armesto, L. and Sala, A. (2019). Metodología de programación dinámica aproximada para control óptimo basada en datos, Revista Iberoamericana de Automática e Informática industrial 16(3): 273–283.
[12] Díaz, H., Armesto, L. and Sala, A. (2020). Fitted Q-function control methodology based on Takagi–Sugeno systems, IEEE Transactions on Control Systems Technology 28(2): 477–488.
[13] Lagoudakis, M.G. and Parr, R. (2003). Least-squares policy iteration, Journal of Machine Learning Research 4(Dec): 1107–1149.
[14] Lewis, F.L. and Liu, D. (2013). Reinforcement Learning and Approximate Dynamic Programming for Feedback Control, Wiley, Hoboken, NJ.
[15] Lewis, F.L. and Vrabie, D. (2009). Reinforcement learning and adaptive dynamic programming for feedback control, IEEE Circuits and Systems Magazine 9(3): 32–50.
[16] Lewis, F., Vrabie, D. and Syrmos, V. (2012). Optimal Control, 3rd Edn, John Wiley Sons, Hoboken, NJ.
[17] Liu, D., Wei, Q., Wang, D., Yang, X. and Li, H. (2017). Adaptive Dynamic Programming with Applications in Optimal Control, Springer, Berlin.
[18] Manne, A.S. (1960). Linear programming and sequential decisions, Management Science 6(3): 259–267.
[19] Marsh, L.C. and Cormier, D.R. (2001). Spline Regression Models, Number 137, Sage, Thousand Oaks, CA.
[20] Munos, R., Baird, L.C. and Moore, A.W. (1999). Gradient descent approaches to neural-net-based solutions of the Hamilton–Jacobi–Bellman equation, International Joint Conference on Neural Networks, Washington, DC, USA, Vol. 3, pp. 2152–2157.
[21] Munos, R. and Szepesvári, C. (2008). Finite-time bounds for fitted value iteration, Journal of Machine Learning Research 9(May): 815–857.
[22] Powell, W.B. (2011). Approximate Dynamic Programming: Solving the Curses of Dimensionality, 2nd Edn, Wiley, Hoboken, NJ.
[23] Preitl, S., Precup, R.-E., Preitl, Z., Vaivoda, S., Kilyeni, S. and Tar, J.K. (2007). Iterative feedback and learning control. servo systems applications, IFAC Proceedings Volumes 40(8): 16–27.
[24] Rantzer, A. (2006). Relaxed dynamic programming in switching systems, IEE Proceedings: Control Theory and Applications 153(5): 567–574.
[25] Robles, R., Sala, A. and Bernal, M. (2019). Performance-oriented quasi-LPV modeling of nonlinear systems, International Journal of Robust and Nonlinear Control 29(5): 1230–1248.
[26] Sutton, R.S. and Barto, A.G. (2018). Reinforcement Learning: An Introduction, 2nd Edn, MIT Press, Cambridge, MA.
[27] Tan, K., Zhao, S. and Xu, J. (2007). Online automatic tuning of a proportional integral derivative controller based on an iterative learning control approach, IET Control Theory Applications 1(1): 90–96.
[28] Zajdel, R. (2013). Epoch-incremental reinforcement learning algorithms, International Journal of Applied Mathematics and Computer Science 23(3): 623–635, DOI: 10.2478/amcs-2013-0047.
[29] Zhao, D., Liu, J., Wu, R., Cheng, D. and Tang, X. (2019). An active exploration method for data efficient reinforcement learning, International Journal of Applied Mathematics and Computer Science 29(2): 351–362, DOI: 10.2478/amcs-2019-0026.