Voir la notice de l'article provenant de la source Library of Science
@article{IJAMCS_2020_30_1_a1, author = {Bania, Piotr}, title = {An information based approach to stochastic control problems}, journal = {International Journal of Applied Mathematics and Computer Science}, pages = {23--34}, publisher = {mathdoc}, volume = {30}, number = {1}, year = {2020}, language = {en}, url = {http://geodesic.mathdoc.fr/item/IJAMCS_2020_30_1_a1/} }
TY - JOUR AU - Bania, Piotr TI - An information based approach to stochastic control problems JO - International Journal of Applied Mathematics and Computer Science PY - 2020 SP - 23 EP - 34 VL - 30 IS - 1 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/IJAMCS_2020_30_1_a1/ LA - en ID - IJAMCS_2020_30_1_a1 ER -
Bania, Piotr. An information based approach to stochastic control problems. International Journal of Applied Mathematics and Computer Science, Tome 30 (2020) no. 1, pp. 23-34. http://geodesic.mathdoc.fr/item/IJAMCS_2020_30_1_a1/
[1] Alpcan, T., Shames, I., Cantoni, M. and Nair, G. (2015). An information-based learning approach to dual control, IEEE Transactions on Neural Networks and Learning Systems 26(11): 2736–2748.
[2] Alspach, D. and Sorenson, H. (1972). Nonlinear Bayesian estimation using Gaussian sum approximations, IEEE Transactions on Automatic Control 17(4): 439–448.
[3] Åström, K. and Wittenmark, B. (1995). Adaptive Control, Second Edition, Dover Publications, NewYork, NY.
[4] Banek, T. (2010). Incremental value of information for discrete-time partially observed stochastic systems, Control and Cybernetics 39(3): 769–781.
[5] Bania, P. (2017). Simple example of dual control problem with almost analytical solution, Proceedings of the 19th Polish Control Conference, Krakow, Poland, pp. 55–64, DOI: 10.1007/978-3-319-60699-6-7.
[6] Bania, P. (2018). Example for equivalence of dual and information based optimal control, International Journal of Control 38(5): 787–803, DOI: 10.1080/00207179.2018.1436775.
[7] Bania, P. (2019). Bayesian input design for linear dynamical model discrimination, Entropy 21(4): 1–13, DOI: 10.3390/e21040351.
[8] Bania, P. and Baranowski, J. (2016). Field Kalman filter and its approximation, 55th IEEE Conference on Decision and Control, Las Vegas, NV, USA, pp. 2875–2880, DOI: 10.1109/CDC.2016.7798697.
[9] Bania, P. and Baranowski, J. (2017). Bayesian estimator of a faulty state: Logarithmic odds approach, 22nd International Conference on Methods and Models in Automation and Robotics (MMAR), Miedzyzdroje, Poland, pp. 253–257, DOI: 10.1109/MMAR.2017.8046834.
[10] Baranowski, J., Bania, P., Prasad, I. and T., C. (2017). Bayesian fault detection and isolation using field Kalman filter, EURASIP Journal on Advances in Signal Processing 79(1), DOI: 10.1186/s13634-017-0514-8.
[11] BarShalom, Y. and Tse, E. (1976). Caution, probing, and the value of information in the control of uncertain systems, Annals of Economic and Social Measurement 5(3): 323–337.
[12] Brechtel, S., Gindele, T. and Dillmann, R. (2013). Solving continuous POMDPs: Value iteration with incremental learning of an efficient space representation, Proceedings of the 30th International Conference on International Conference on Machine Learning, ICML’13, Atlanta, GA, USA, Vol. 28, pp. III–370–III–378.
[13] Byrd, R., Hansen, S., Nocedal, J. and Singer, Y. (2016). A stochastic quasi-Newton method for large-scale optimization, SIAM Journal on Optimization 26(2): 1008–1031.
[14] Cover, T.M. and Thomas, J.A. (2006). Elements of Information Theory, Second Edition, John Wiley Sons, Inc., Hoboken, NJ.
[15] Delvenne, J.C. and Sandberg, H. (2013). Towards a thermodynamics of control: Entropy, energy and Kalman filtering, 52nd IEEE Conference on Decision and Control, Florence, Italy, pp. 3109–3114.
[16] Dolgov, M. (2017). Approximate Stochastic Optimal Control of Smooth Nonlinear Systems and Piecewise Linear Systems, PhD thesis, Karlsruhe Institute of Technology, Karlsruhe.
[17] Feldbaum, A.A. (1965). Optimal Control Systems, Academic Press, New York, NY.
[18] Filatov, N.M. and Unbehauen, H. (2004). Adaptive Dual Control: Theory and Applications, Springer-Verlag, Berlin/Heidelberg.
[19] Hijab, O. (1984). Entropy and dual control, 23rd Conference on Decision and Control, Las Vegas, NV, USA, pp. 45–50.
[20] Huang, C., Ho, D.W.C., Lu, J. and Kurths, J. (2012). Partial synchronization in stochastic dynamical networks with switching communication channels, Chaos: An Interdisciplinary Journal of Nonlinear Science 22(2): 023108, DOI: 10.1063/1.3702576.
[21] Jiang, H. (2017). Uniform convergence rates for kernel density estimation, Proceedings of the 34th International Conference on Machine Learning, Sydney, Australia, pp. 1694–1703.
[22] Joe, H. (1989). Estimation of entropy and other functionals of a multivariate density, Annals of the Institute of Statistical Mathematics 41(4): 683–697.
[23] Kolchinsky, A. and Tracey, B.D. (2017). Estimating mixture entropy with pairwise distances, Entropy 19(361): 1–17.
[24] Korbicz, J., Koscielny, J.M., Kowalczuk, Z. and Cholewa, W. (2004). Fault Diagnosis: Models, Artificial Intelligence, Applications, Springer-Verlag, Berlin/Heidelberg.
[25] Kozlowski, E. and Banek, T. (2011). Active learning in discrete time stochastic systems, in J. Jozefczyk and D. Orski (Eds), Knowledge-Based Intelligent System Advancements: Systemic and Cybernetic Approaches, Information Science References, New York, NY, pp. 350–371.
[26] Mitter, S.K. and Newton, N.J. (2005). Information and entropy flow in the Kalman–Bucy filter, Journal of Statistical Physics 118(1): 145–176.
[27] Porta, J.M., Vlassis, N., Spaan, M.T. and Poupart, P. (2006). Point-based value iteration for continuous POMDPs, Journal of Machine Learning Research 7(1): 2329–2367.
[28] Sagawa, T. and Ueda, M. (2013). Role of mutual information in entropy production under information exchanges, New Journal of Physics 15(125012): 2–23.
[29] Saridis, G.N. (1988). Entropy formulation of optimal and adaptive control, IEEE Transactions on Automatic Control 33(8): 713–721.
[30] Särkä, S. (2013). Bayesian Filtering and Smoothing, Cambridge University Press, New York, NY.
[31] Taticonda, S. and Mitter, S.K. (2004). Control under communication constraints, IEEE Transactions on Automatic Control 49(7): 1056–1068.
[32] Thrun, S. (2000). Monte Carlo POMDPs, in S. Solla et al. (Eds), Advances in Neural Information Processing Systems, MIT Press, Cambridge, MA, pp. 1064–1070.
[33] Touchette, H. (2000). Information-theoretic Aspects in the Control of Dynamical Systems Master’s thesis, MIT, Cambridge, MA, https://pdfs.semanticscholar.org/c915/088f514d937f5d1c666221c95d731532101e.pdf.
[34] Touchette, H. and Lloyd, S. (2000). Information-theoretic limits of control, Physical Review Letters 84(6): 1156–1159.
[35] Touchette, H. and Lloyd, S. (2004). Information-theoretic approach to the study of control systems, Physica A 331(1): 140–172.
[36] Tsai, Y.A., Casiello, F.A. and Loparo, K.A. (1992). Discrete-time entropy formulation of optimal and adaptive control problems, IEEE Transactions on Automatic Control 37(7): 1083–1088.
[37] Tse, E. (1974). Adaptive dual control methods, Annals of Economic and Social Measurement 3(1): 65–82.
[38] Uciński, D. (2004). Optimal Measurement Methods for Distributed Parameter System Identification, CRC Press, Boca Raton, FL.
[39] Zabczyk, J. (1996). Chance and Decision. Stochastic Control in Discrete Time, Quaderni Scuola Normale di Pisa, Pisa.
[40] Zhao, D., Liu, J., Wu, R., Cheng, D. and Tang, X. (2019). An active exploration method for data efficient reinforcement learning, International Journal of Applied Mathematics and Computer Science 29(2): 351–362, DOI: 10.2478/amcs-2019-0026.