@article{VYURV_2016_5_3_a6,
author = {N. A. Likhoded and M. A. Paliashchuk},
title = {Estimate of locality of parallel algorithms implemented on {GPUs}},
journal = {Vestnik \^U\v{z}no-Uralʹskogo gosudarstvennogo universiteta. Seri\^a Vy\v{c}islitelʹna\^a matematika i informatika},
pages = {96--111},
year = {2016},
volume = {5},
number = {3},
language = {ru},
url = {http://geodesic.mathdoc.fr/item/VYURV_2016_5_3_a6/}
}
TY - JOUR AU - N. A. Likhoded AU - M. A. Paliashchuk TI - Estimate of locality of parallel algorithms implemented on GPUs JO - Vestnik Ûžno-Uralʹskogo gosudarstvennogo universiteta. Seriâ Vyčislitelʹnaâ matematika i informatika PY - 2016 SP - 96 EP - 111 VL - 5 IS - 3 UR - http://geodesic.mathdoc.fr/item/VYURV_2016_5_3_a6/ LA - ru ID - VYURV_2016_5_3_a6 ER -
%0 Journal Article %A N. A. Likhoded %A M. A. Paliashchuk %T Estimate of locality of parallel algorithms implemented on GPUs %J Vestnik Ûžno-Uralʹskogo gosudarstvennogo universiteta. Seriâ Vyčislitelʹnaâ matematika i informatika %D 2016 %P 96-111 %V 5 %N 3 %U http://geodesic.mathdoc.fr/item/VYURV_2016_5_3_a6/ %G ru %F VYURV_2016_5_3_a6
N. A. Likhoded; M. A. Paliashchuk. Estimate of locality of parallel algorithms implemented on GPUs. Vestnik Ûžno-Uralʹskogo gosudarstvennogo universiteta. Seriâ Vyčislitelʹnaâ matematika i informatika, Tome 5 (2016) no. 3, pp. 96-111. http://geodesic.mathdoc.fr/item/VYURV_2016_5_3_a6/
[1] N.A. Likhoded, M.A. Poleshchuk, “Method of Ranking Tiles Size Parameters of Parallel Algorithm”, Proceedings of the National Academy of Sciences of Belarus, 59:4 (2015), 25–33
[2] M. Kandemir, J. Ramanujam, M. Irwin, V. Narayana, I. Kadayif, A. Parikh, “A Compiler Based Approach for Dynamically Managing Scratch-Pad Memories in Embedded Systems”, IEEE Transactions on Computer-Aided Design, 23:2 (2004), 243–260 | DOI
[3] M. Baskaran, U. Bondhugula, S. Krishnamoorthy, J. Ramanujam, A. Rountev, P. Sadayappan, “Automatic Data Movement and Computation Mapping for Multi-Level Parallel Architectures with Explicitly Managed Memories”, Proceedings of the 13th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (Salt Lake City, USA, February 20–23), 2008, 1–10 | DOI
[4] Vl.V. Voevodin, Vad.V. Voevodin, “The Fortunate Locality of Supercomputers”, Open Systems, 2013, no. 9, 12–15
[5] Vl.V. Voevodin, Vad.V. Voevodin, Parallel Computing, BKhV-Peterburg, Sankt-Peterburg, 2002, 608 pp.
[6] J. Xue, W. Cai, “Time-Minimal Tiling when Rise is Larger than Zero”, Parallel Computing, 28:5 (2002), 915–939 | DOI
[7] M. Baskaran, J. Ramanujam, P. Sadayappan, “Automatic C-to-CUDA Code Generation for Affine Programs”, Proceedings of the Compiler Construction, 19th International Conference. Part of the Joint European Conferences on Theory and Practice of Software (Paphos, Cyprus, March 20–28), 2010, 244–263 | DOI
[8] U. Bondhugula, M. Baskaran, S. Krishnamoorthy, J. Ramanujam, A. Rountev, P. Sadayappan, “Automatic Transformations for Communication-Minimized Parallelization and Locality Optimization in the Polyhedral Model”, Lecture Notes in Computer Science, 2008, no. 4959, 132–146 | DOI
[9] G. Venkataraman, S. Sahni, S. Mukhopadhyaya, “A Blocked All-Pairs Shortest-Paths Algorithm”, J. Exp. Algorithmics, 2003, no. 8, 2.2 | DOI
[10] G.J. Katz, J. Kider, “All-Pairs Shortest-Paths for Large Graphs on the GPU”, Proceedings of the 23rd ACM SIGGRAPH/EUROGRAPHICS Symposium on Graphics Hardware (Sarajevo, Bosnia and Herzegovina), Eurographics Association, 2008, 47–55
[11] B.D. Lund, J.W. Smith, “A Multi-Stage CUDA Kernel for Floyd-Warshall”, CoRR abs/1001.4108, 2010