Voir la notice de l'article provenant de la source Library of Science
@article{IJAMCS_2019_29_2_a10, author = {Zhao, Dongfang and Liu, Jiafeng and Wu, Rui and Cheng, Dansong and Tang, Xianglong}, title = {An active exploration method for data efficient reinforcement learning}, journal = {International Journal of Applied Mathematics and Computer Science}, pages = {351--362}, publisher = {mathdoc}, volume = {29}, number = {2}, year = {2019}, language = {en}, url = {http://geodesic.mathdoc.fr/item/IJAMCS_2019_29_2_a10/} }
TY - JOUR AU - Zhao, Dongfang AU - Liu, Jiafeng AU - Wu, Rui AU - Cheng, Dansong AU - Tang, Xianglong TI - An active exploration method for data efficient reinforcement learning JO - International Journal of Applied Mathematics and Computer Science PY - 2019 SP - 351 EP - 362 VL - 29 IS - 2 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/IJAMCS_2019_29_2_a10/ LA - en ID - IJAMCS_2019_29_2_a10 ER -
%0 Journal Article %A Zhao, Dongfang %A Liu, Jiafeng %A Wu, Rui %A Cheng, Dansong %A Tang, Xianglong %T An active exploration method for data efficient reinforcement learning %J International Journal of Applied Mathematics and Computer Science %D 2019 %P 351-362 %V 29 %N 2 %I mathdoc %U http://geodesic.mathdoc.fr/item/IJAMCS_2019_29_2_a10/ %G en %F IJAMCS_2019_29_2_a10
Zhao, Dongfang; Liu, Jiafeng; Wu, Rui; Cheng, Dansong; Tang, Xianglong. An active exploration method for data efficient reinforcement learning. International Journal of Applied Mathematics and Computer Science, Tome 29 (2019) no. 2, pp. 351-362. http://geodesic.mathdoc.fr/item/IJAMCS_2019_29_2_a10/
[1] Ahmed, N.A. and Gokhale, D. (1989). Entropy expressions and their estimators for multivariate distributions, IEEE Transactions on Information Theory 35(3): 688–692.
[2] Bagnell, J.A. and Schneider, J.G. (2001). Autonomous helicopter control using reinforcement learning policy search methods, IEEE International Conference on Robotics and Automation, Seoul, South Korea, Vol. 2, pp. 1615–1620.
[3] Chebotar, Y., Hausman, K., Zhang, M., Sukhatme, G., Schaal, S. and Levine, S. (2017). Combining model-based and model-free updates for trajectory-centric reinforcement learning, arXiv:1703.03078.
[4] Deisenroth, M.P., Fox, D. and Rasmussen, C.E. (2015). Gaussian processes for data-efficient learning in robotics and control, IEEE Transactions on Pattern Analysis and Machine Intelligence 37(2): 408–423.
[5] Deisenroth, M. and Rasmussen, C.E. (2011). PILCO: A model-based and data-efficient approach to policy search, Proceedings of the 28th International Conference on Machine Learning (ICML-11), Bellevue, WA, USA, pp. 465–472.
[6] Ebert, F., Finn, C., Lee, A.X. and Levine, S. (2017). Self-supervised visual planning with temporal skip connections, arXiv:1710.05268.
[7] Fabisch, A. and Metzen, J.H. (2014). Active contextual policy search, Journal of Machine Learning Research 15(1): 3371–3399.
[8] Finn, C. and Levine, S. (2016). Deep visual foresight for planning robot motion, arXiv:1610.00696.
[9] Finn, C., Tan, X.Y., Duan, Y., Darrell, T., Levine, S. and Abbeel, P. (2015). Deep spatial autoencoders for visuomotor learning, arXiv:1509.06113.
[10] Gruslys, A., Azar, M.G., Bellemare, M.G. and Munos, R. (2017). The reactor: A sample-efficient actor-critic architecture, arXiv:1704.04651.
[11] Hayes, G. and Demiris, J. (1994). A robot controller using learning by imitation, International Symposium on Intelligent Robotic Systems 676(5): 1257–1274.
[12] Levine, S., Finn, C., Darrell, T. and Abbeel, P. (2016). End-to-end training of deep visuomotor policies, Journal of Machine Learning Research 17(1): 1334–1373.
[13] Nagabandi, A., Kahn, G., Fearing, R.S. and Levine, S. (2017). Neural network dynamics for model-based deep reinforcement learning with model-free fine-tuning, arXiv:1708.02596.
[14] Ng, A., Coates, A., Diel, M., Ganapathi, V., Schulte, J., Tse, B., Berger, E. and Liang, E. (2006). Autonomous inverted helicopter flight via reinforcement learning, in M.H. Ang Jr. and O. Khatib (Eds.), Experimental Robotics IX, Springer, Berlin/Heidelberg, pp. 363–372.
[15] Pan, Y. and Theodorou, E.A. (2014). Probabilistic differential dynamic programming, Advances in Neural Information Processing Systems 3: 1907–1915.
[16] Pan, Y., Theodorou, E.A. and Kontitsis, M. (2015). Sample efficient path integral control under uncertainty, Advances in Neural Information Processing Systems 2015: 2314–2322.
[17] Price, B. and Boutilier, C. (2003). Accelerating reinforcement learning through implicit imitation, Journal of Artificial Intelligence Research 19: 569–629.
[18] Silver, D., Sutton, R.S. and Müller, M. (2008). Sample-based learning and search with permanent and transient memories, International Conference on Machine Learning, Helsinki, Finland, pp. 968–975.
[19] Sutton, R.S. (1988). Learning to predict by the methods of temporal differences, Machine Learning 3(1): 9–44.
[20] Sutton, R.S. (1991). Dyna, an integrated architecture for learning, planning, and reacting, ACM Sigart Bulletin 2(4): 160–163.