Voir la notice de l'article provenant de la source Library of Science
@article{IJAMCS_2017_27_4_a5, author = {Weinberg, A. I. and Last, M.}, title = {Interpretable decision-tree induction in a big data parallel framework}, journal = {International Journal of Applied Mathematics and Computer Science}, pages = {737--748}, publisher = {mathdoc}, volume = {27}, number = {4}, year = {2017}, language = {en}, url = {http://geodesic.mathdoc.fr/item/IJAMCS_2017_27_4_a5/} }
TY - JOUR AU - Weinberg, A. I. AU - Last, M. TI - Interpretable decision-tree induction in a big data parallel framework JO - International Journal of Applied Mathematics and Computer Science PY - 2017 SP - 737 EP - 748 VL - 27 IS - 4 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/IJAMCS_2017_27_4_a5/ LA - en ID - IJAMCS_2017_27_4_a5 ER -
%0 Journal Article %A Weinberg, A. I. %A Last, M. %T Interpretable decision-tree induction in a big data parallel framework %J International Journal of Applied Mathematics and Computer Science %D 2017 %P 737-748 %V 27 %N 4 %I mathdoc %U http://geodesic.mathdoc.fr/item/IJAMCS_2017_27_4_a5/ %G en %F IJAMCS_2017_27_4_a5
Weinberg, A. I.; Last, M. Interpretable decision-tree induction in a big data parallel framework. International Journal of Applied Mathematics and Computer Science, Tome 27 (2017) no. 4, pp. 737-748. http://geodesic.mathdoc.fr/item/IJAMCS_2017_27_4_a5/
[1] AlSabti, K., Ranka, S. and Singh, V. (1998). Clouds: Classification for large or out-of-core datasets, Conference on Knowledge Discovery and Data Mining, New York, NY, USA, pp. 2–8.
[2] Amado, N., Gama, J. and Silva, F. (2001). Parallel implementation of decision tree learning algorithms, in P. Brazdil and A. Jorge (Eds.), Progress in Artificial Intelligence, Springer, Berlin/Heidelberg, pp. 6–13.
[3] Amado, N., Gama, J. and Silva, F. (2003). Exploiting parallelism in decision tree induction, ECML/PKDDWorkshop on Parallel and Distributed Computing for Machine Learning, Cavtat/Dubrovnik, Croatia, pp. 13–22.
[4] Andrzejak, A., Langner, F. and Zabala, S. (2013). Interpretable models from distributed data via merging of decision trees, IEEE Symposium on Computational Intelligence and Data Mining (CIDM), Savannah, GA, USA, pp. 1–9.
[5] Bekkerman, R., Bilenko, M. and Langford, J. (2011). Scaling up Machine Learning: Parallel and Distributed Approaches, Cambridge University Press, Cambridge.
[6] Ben-Haim, Y. and Tom-Tov, E. (2010). A streaming parallel decision tree algorithm, The Journal of Machine Learning Research 11: 849–872.
[7] Breiman, L. (1999). Pasting small votes for classification in large databases and on-line, Machine Learning 36(1–2): 85–103.
[8] Dai, W. and Ji, W. (2014). A MAPREDUCE implementation of c4.5 decision tree algorithm, International Journal of Database Theory and Application 7(1): 49–60.
[9] DeWitt, D.J., Naughton, J.F. and Schneider, D. (1991). Parallel sorting on a shared-nothing architecture using probabilistic splitting, Proceedings of the 1st International Conference on Parallel and Distributed Information Systems, Miami Beach, FL, USA, pp. 280–291.
[10] Domingos, P. and Hulten, G. (2000). Mining high-speed data streams, Proceedings of the 6th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Boston, MA, USA, pp. 71–80.
[11] Fan, W. and Bifet, A. (2013). Mining big data: Current status, and forecast to the future, ACM sIGKDD Explorations Newsletter 14(2): 1–5.
[12] Gehrke, J., Ganti, V., Ramakrishnan, R. and Loh, W.-Y. (1999). Boat—optimistic decision tree construction, in S. Davidson and C. Faloutsos (Eds.), ACM SIGMOD Record, Vol. 28, ACM, New York, NY, pp. 169–180.
[13] Goil, S. and Choudhary, A. (2001). Parsimony: An infrastructure for parallel multidimensional analysis and data mining, Journal of Parallel and Distributed Computing 61(3): 285–321.
[14] Hansen, L.K. and Salamon, P. (1990). Neural network ensembles, IEEE Transactions on Pattern Analysis Machine Intelligence 12(10): 993–1001.
[15] Jin, R. and Agrawal, G. (2003). Communication and memory efficient parallel decision tree construction, Proceedings of the 3rd SIAM International Conference on Data Mining, San Francisco, CA, USA, pp. 119–129.
[16] Joshi, M.V., Karypis, G. and Kumar, V. (1998). SCALPARC: A new scalable and efficient parallel classification algorithm for mining large datasets, Parallel Processing Symposium, Los Alamitos, CA, USA, pp. 573–579.
[17] Kargupta, H. and Park, B.-H. (2004). A Fourier spectrum-based approach to represent decision trees for mining data streams in mobile environments, IEEE Transactions on Knowledge and Data Engineering 16(2): 216–229.
[18] Kourtellis, N., Morales, G.D.F., Bifet, A. and Murdopo, A. (2016). VHT: Vertical Hoeffding tree, arXiv preprint, 1607.08325.
[19] Louppe, G. and Geurts, P. (2012). Ensembles on random patches, in P.A. Flach et al. (Eds.), Machine Learning and Knowledge Discovery in Databases, Springer, Berlin/Heidelberg, pp. 346–361.
[20] Mehta, M., Agrawal, R. and Rissanen, J. (1996). SLIQ: A fast scalable classifier for data mining, in P. Aspers et al. (Eds.), Advances in Database Technology, Springer, Berlin/Heidelberg, pp. 18–32.
[21] Miglio, R. and Soffritti, G. (2004). The comparison between classification trees through proximity measures, Computational Statistics Data Analysis 45(3): 577–593.
[22] Narlikar, G.J. (1998). A parallel, multithreaded decision tree builder, Technical report, DTIC Document, http://www.dtic.mil/docs/citations/ADA363531.
[23] Ntoutsi, I., Kalousis, A. and Theodoridis, Y. (2008). A general framework for estimating similarity of datasets and decision trees: Exploring semantic similarity of decision trees, in C. Apte et al. (Eds.), SIAM Conference on Data Mining, SIAM, Philadelphia, PA, pp. 810–821.
[24] Panda, B., Herbach, J.S., Basu, S. and Bayardo, R.J. (2009). Planet: Massively parallel learning of tree ensembles with MapReduce, Proceedings of the VLDB Endowment 2(2): 1426–1437.
[25] Pawlik, M. and Augsten, N. (2011). RTED: A robust algorithm for the tree edit distance, Proceedings of the VLDB Endowment 5(4): 334–345.
[26] Shafer, J., Agrawal, R. and Mehta, M. (1996). Sprint: A scalable parallel classifier for data mining, International Conference on Very Large Data Bases, Mumbai (Bombay), India, pp. 544–555.
[27] Shannon, W.D. and Banks, D. (1999). Combining classification trees using MLE, Statistics in Medicine 18(6): 727–740.
[28] Sollich, P. and Krogh, A. (1996). Learning with ensembles: How overfitting can be useful, in D.S. Touretzky et al. (Eds.)Advances in Neural Information Processing Systems 8, MIT Press, Cambridge, MA, pp. 190–196.
[29] Sreenivas, M.K., AlSabti, K. and Ranka, S. (2000). Parallel out-of-core decision tree classifiers, in H. Kargupta and P. Chan (Eds.), Advances in Distributed and Parallel Knowledge Discovery, Cambridge, MA, pp. 317–336.
[30] Srivastava, A., Han, E.-H., Kumar, V. and Singh, V. (1995). Parallel formulations of decision-tree classification algorithms, Data Mining and Knowledge Discovery 3(3): 237–261.
[31] Triguero, I., Peralta, D., Bacardit, J., García, S. and Herrera, F. (2015). MRPR: A MAPREDUCE solution for prototype reduction in big data classification, Neurocomputing 150(A): 331–345.
[32] Zhang, K. and Shasha, D. (1989). Simple fast algorithms for the editing distance between trees and related problems, SIAM Journal on Computing 18(6): 1245–1262.
[33] Zhang, X. and Jiang, S. (2012). A splitting criteria based on similarity in decision tree learning, Journal of Software 7(8): 1775–1782.
[34] Zhang, Y., Gao, Q., Gao, L. and Wang, C. (2012). IMAPREDUCE: A distributed computing framework for iterative computation, Journal of Grid Computing 10(1): 47–68.