@article{VYURU_2010_6_a8,
author = {A. Y. Polyakov},
title = {On program restoration from checkpoints set},
journal = {Vestnik \^U\v{z}no-Uralʹskogo gosudarstvennogo universiteta. Seri\^a, Matemati\v{c}eskoe modelirovanie i programmirovanie},
pages = {91--103},
year = {2010},
number = {6},
language = {ru},
url = {http://geodesic.mathdoc.fr/item/VYURU_2010_6_a8/}
}
TY - JOUR AU - A. Y. Polyakov TI - On program restoration from checkpoints set JO - Vestnik Ûžno-Uralʹskogo gosudarstvennogo universiteta. Seriâ, Matematičeskoe modelirovanie i programmirovanie PY - 2010 SP - 91 EP - 103 IS - 6 UR - http://geodesic.mathdoc.fr/item/VYURU_2010_6_a8/ LA - ru ID - VYURU_2010_6_a8 ER -
A. Y. Polyakov. On program restoration from checkpoints set. Vestnik Ûžno-Uralʹskogo gosudarstvennogo universiteta. Seriâ, Matematičeskoe modelirovanie i programmirovanie, no. 6 (2010), pp. 91-103. http://geodesic.mathdoc.fr/item/VYURU_2010_6_a8/
[1] V. G. Khoroshevskii, Arkhitektura vychislitelnykh sistem, MGTU im. N. E. Baumana, M., 2008, 520 pp.
[2] TOP500 supercomputer site, , Zagl. s ekrana. (yaz. angl.) http://www.top500.org/
[3] E. N. Elnozahy, L. Alvisi, Y. M. Wang, D. B. Johnson, “A survey of rollback-recovery protocols in message-passing systems”, ACM Computing Surveys, 34:3 (2002), 375–408 | DOI
[4] J. Ansel, K. Arya, G. Cooperman, “DMTCP: Transparent Checkpointing for Cluster Computations and the Desktop”, Proc. of IEEE International Parallel and Distributed Processing Symposium, IPDPS'09 (Rome, 2009), 1–12 | Zbl
[5] P. H. Hargrove, J. C. Duell, “Berkeley Lab Checkpoint/Restart (BLCR) for Linux Clusters”, Proceedings of SCIENTIFIC DISCOVERY THROUGH ADVANCED COMPUTING (Denver, 2006), Journal of Physics: Conference Series, 46, 494–499 | DOI
[6] M. Litzkow, T. Tannenbaum, J. Basney, M. Livny, Checkpoint and migration of UNIX processes in the Condor distributed processing system, Technical report 1346, University of Wisconsin, Madison, Wisconsin, 1997, 8 pp.
[7] J. S. Plank, M. Beck, G. Kingsley, K. Li, “Libckpt: Transparent checkpointing under Unix”, Proc. of the USENIX Winter 1995 Technical Conference (New Orleans, 1995), 213–323
[8] J. Hursey, J. M. Squyres, T. I. Mattox, A. Lumsdaine, “The design and implementation of checkpoint/restart process fault tolerance for Open MPI”, Proceedings of the 21st IEEE International Parallel and Distributed Processing Symposium (IPDPS), IEEE Computer Society, Long Beach, 2007, 1–8
[9] Q. Gao, W. Yu, W. Huang, D. K. Panda, “Application-transparent checkpoint/restart for MPI programs over InfiniBand”, Proceedings of the 2006 International Conference on Parallel Processing, IEEE Computer Society, Washington, 2006, 471–478 | DOI
[10] D. Dewolfs, J. Broeckhove, V. Sunderam, G. Fagg, “FT-MPI, Fault-Tolerant Metacomputing and Generic Name Services: A Case Study”, Lecture Notes in Computer Science, Springer, Berlin-Heidelberg, 2006, 133–140 | DOI
[11] A. M. Visan, A. Polyakov, P. S. Solanki, K. Arya, T. Denniston, G. Cooperman, Temporal Debugging using URDB, 2009, arXiv: 0910.5046v1