Keywords: CUDA, Power8, KNL, Graph500, parallel graph processing.
@article{VYURV_2018_7_2_a0,
author = {A. S. Kolganov},
title = {The fastest and energy-efficient breadth-first search algorithm on a single node with various parallel architectures according to {Graph500}},
journal = {Vestnik \^U\v{z}no-Uralʹskogo gosudarstvennogo universiteta. Seri\^a Vy\v{c}islitelʹna\^a matematika i informatika},
pages = {5--21},
year = {2018},
volume = {7},
number = {2},
language = {ru},
url = {http://geodesic.mathdoc.fr/item/VYURV_2018_7_2_a0/}
}
TY - JOUR AU - A. S. Kolganov TI - The fastest and energy-efficient breadth-first search algorithm on a single node with various parallel architectures according to Graph500 JO - Vestnik Ûžno-Uralʹskogo gosudarstvennogo universiteta. Seriâ Vyčislitelʹnaâ matematika i informatika PY - 2018 SP - 5 EP - 21 VL - 7 IS - 2 UR - http://geodesic.mathdoc.fr/item/VYURV_2018_7_2_a0/ LA - ru ID - VYURV_2018_7_2_a0 ER -
%0 Journal Article %A A. S. Kolganov %T The fastest and energy-efficient breadth-first search algorithm on a single node with various parallel architectures according to Graph500 %J Vestnik Ûžno-Uralʹskogo gosudarstvennogo universiteta. Seriâ Vyčislitelʹnaâ matematika i informatika %D 2018 %P 5-21 %V 7 %N 2 %U http://geodesic.mathdoc.fr/item/VYURV_2018_7_2_a0/ %G ru %F VYURV_2018_7_2_a0
A. S. Kolganov. The fastest and energy-efficient breadth-first search algorithm on a single node with various parallel architectures according to Graph500. Vestnik Ûžno-Uralʹskogo gosudarstvennogo universiteta. Seriâ Vyčislitelʹnaâ matematika i informatika, Tome 7 (2018) no. 2, pp. 5-21. http://geodesic.mathdoc.fr/item/VYURV_2018_7_2_a0/
[1] B. V. Cherkassky, A. V. Goldberg, T. Radzik, “Shortest Paths Algorithms: Theory and Experimental Evaluation”, Math. Program, 73 (1996), 129–174 | DOI
[2] E. F. Moore, “The Shortest Path through a Maze”, Proceedings of the International Symposium on the Theory of Switching (2–5 April 1957), Harvard University Press, 1959, 285–292
[3] B. A. Chazelle, “Minimum Spanning Tree Algorithm with Inverse-ackermann Type Complexity”, Journal of the ACM, 47:6 (2000), 1028–1047 | DOI
[4] A. S. Kolganov, “Parallel Implementation of Minimum Spanning Tree Algorithm on CPU and GPU”, Bulletin of South Ural State University. Series: Computational Mathematics and Software Engineering, 5:3 (2016), 5–19 | DOI
[5] Graph500, } {\tt http://graph500.org/
[6] GreenGraph500 } {\tt http://green.graph500.org/
[7] T. Cormen, C. Leiserson, R. Rivest, Introduction to Algorithms, MIT Press, Cambridge, 1990
[8] J. Edmonds, R. M. Karp, “Theoretical Improvements in Algorithmic Efficiency for Network Flow Problems”, Journal of the ACM, 19:2 (1972), 248–264 | DOI
[9] U. Brandes, “A Faster Algorithm for Betweenness Centrality”, J. Math. Sociol, 25:2 (2001), 163–177 | DOI
[10] M. Frasca, K. Madduri, P. Raghavan, “NUMA-Aware Graph Mining Techniques for Performance and Energy Efficiency”, Proc. ACM/IEEE Int. Conf. High Performance Computing, Networking, Storage and Analysis (Salt Lake City, Utah, USA, November 10–16, 2012), 2012, 1–11 | DOI
[11] M. Girvan, M.E. Newman, “Community Structure in Social and Biological Networks”, Proceedings of the National Academy of Sciences of the United States of America (USA, June 11, 2002), v. 99, no. 12, 2002, 7821–7826 | DOI
[12] E. W. Dijkstra, “A Note on Two Problems in Connexion with Graphs”, Numerische Mathematik, 1:1 (1959), 269–271 | DOI
[13] Top500, } {\tt https://www.top500.org/
[14] D. A. Bader, K. Madduri, “Designing Multithreaded Algorithms for Breadth-first Search and St-connectivity on the Cray MTA-2”, International Conference on Parallel Processing, ICPP'06, 2006, 523–530 | DOI
[15] R. E. Korf, P. Schultze, “Large-scale Parallel Breadth-first Search”, AAAI, 2005, 1380–1385
[16] A. Yoo, E. Chow, K. Henderson, W. McLendon W., Hendrickson B., Catalyurek U., “A Scalable Distributed Parallel Breadth-first Search Algorithm on BlueGene/L”, Proceedings of the 2005 ACM/IEEE Conference on Supercomputing (Seattle, Washington, USA, November 12–18, 2005), 2005 | DOI
[17] Y. Zhang, E. A. Hansen, “Parallel Breadth-first Heuristic Search on a Shared-memor Architecture”, AAAI Workshop on Heuristic Search, Memory-Based Heuristics and Their Applications, 2006
[18] Y. Yasui, K. Fujisawa, Y. Sato, “Fast and Energy-efficient Breadth-First Search on a Single NUMA System”, Lecture Notes in Computer Science. Vol, 8488, 365–381 | DOI
[19] T. Hiragushi, D. Takahashi, “Efficient Hybrid Breadth-First Search on GPUs”, Lecture Notes in Computer Science. Vol, 8286, 40–50 | DOI
[20] D. Merrill, M. Garland, A. Grimshaw, “Scalable GPU Graph Traversal”, Proceedings of the 17th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (New Orleans, Louisiana, USA, February 25–29, 2012), 2012, 117–128 | DOI
[21] D. Chakrabarti, Y. Zhan, C. Faloutsos, “R-MAT: A Recursive Model for Graph Mining”, Proceedings of the 2004 SIAM International Conference on Data Mining (Florida, USA, April 22–24, 2004), 2004, 442–446 | DOI
[22] S. Pissanetzky, Sparse Matrix Technology, Academic Press, 1984
[23] CUDA Dynamic Parallelism, } {\tt https://devblogs.nvidia.com/parallelforall/...
[24] Intel Xeon Phi 7250, } {\tt https://ark.intel.com/ru/products/94035/Intel-Xeon-Phi...
[25] Intel Xeon E5 2699 v3, } {\tt https://ark.intel.com/ru/products/81061/Intel-Xeon-Processor...
[26] IBM Power8 and NVidia Tesla P100, } {\tt https://www-03.ibm.com/systems/ru/power/hardware/s822lc-hpc/
[27] Nvidia NVLink, } {\tt http://www.nvidia.com/object/nvlink.html