Voir la notice de l'article provenant de la source Library of Science
@article{IJAMCS_2019_29_1_a4, author = {Ali, Syed Muhammad Fawad and Mey, Johannes and Thiele, Maik}, title = {Parallelizing user-defined functions in the {ETL} workflow using orchestration style sheets}, journal = {International Journal of Applied Mathematics and Computer Science}, pages = {69--79}, publisher = {mathdoc}, volume = {29}, number = {1}, year = {2019}, language = {en}, url = {http://geodesic.mathdoc.fr/item/IJAMCS_2019_29_1_a4/} }
TY - JOUR AU - Ali, Syed Muhammad Fawad AU - Mey, Johannes AU - Thiele, Maik TI - Parallelizing user-defined functions in the ETL workflow using orchestration style sheets JO - International Journal of Applied Mathematics and Computer Science PY - 2019 SP - 69 EP - 79 VL - 29 IS - 1 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/IJAMCS_2019_29_1_a4/ LA - en ID - IJAMCS_2019_29_1_a4 ER -
%0 Journal Article %A Ali, Syed Muhammad Fawad %A Mey, Johannes %A Thiele, Maik %T Parallelizing user-defined functions in the ETL workflow using orchestration style sheets %J International Journal of Applied Mathematics and Computer Science %D 2019 %P 69-79 %V 29 %N 1 %I mathdoc %U http://geodesic.mathdoc.fr/item/IJAMCS_2019_29_1_a4/ %G en %F IJAMCS_2019_29_1_a4
Ali, Syed Muhammad Fawad; Mey, Johannes; Thiele, Maik. Parallelizing user-defined functions in the ETL workflow using orchestration style sheets. International Journal of Applied Mathematics and Computer Science, Tome 29 (2019) no. 1, pp. 69-79. http://geodesic.mathdoc.fr/item/IJAMCS_2019_29_1_a4/
[1] Ali, S.M.F. (2018). Next-generation ETL framework to address the challenges posed by big data, Workshop Proceedings of the EDBT/ICDT Joint Conference, Vienna, Austria.
[2] Ali, S.M.F. and Wrembel, R. (2017). From conceptual design to performance optimization of ETL workflows: Current state of research and open problems, The VLDB Journal 26(6): 1–25.
[3] Aßmann, U. (2003). Invasive software composition, Invasive Software Composition, Springer, Berlin/Heidelberg, pp. 107–145.
[4] Battré, D., Ewen, S., Hueske, F., Kao, O., Markl, V. and Warneke, D. (2010). Nephele/PACTs: A programming model and execution framework for web-scale analytical processing, Proceedings of the Symposium on Cloud Computing, Indianapolis, IN, USA, pp. 119–130.
[5] Chaiken, R., Jenkins, B., Larson, P.-Å ., Ramsey, B., Shakib, D., Weaver, S. and Zhou, J. (2008). Scope: Easy and efficient parallel processing of massive data sets, Proceedings of the VLDB Endowment 1(2): 1265–1276.
[6] Cloudera (2016). Example: Sentiment analysis using MapReduce custom counters, https://www.cloudera.com/documentation/other/tutorial/CDH5/topics/ht_example_4_sentiment_analysis.html.
[7] Dagum, L. and Menon, R. (1998). OpenMP: An industry standard API for shared-memory programming, IEEE Computational Science and Engineering 5(1): 46–55.
[8] Dean, J. and Ghemawat, S. (2008). MapReduce: Simplified data processing on large clusters, Communications of the ACM 51(1) 107–113.
[9] Ekman, T. and Hedin, G. (2007). The JastAdd system modular extensible compiler construction, Science of Computer Programming 69(1–3): 14–26.
[10] Ghazal, A., Rabl, T., Hu, M., Raab, F., Poess, M., Crolotte, A. and Jacobsen, H.-A. (2013). Bigbench: Towards an industry standard benchmark for big data analytics, Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data, New York, NY, USA, pp. 1197–1208.
[11] González-Vélez, H. and Kontagora, M. (2011). Performance evaluation of MapReduce using full virtualisation on a departmental cloud, International Journal of Applied Mathematics and Computer Science 21(2): 275–284, DOI: 10.2478/v10006-011-0020-3.
[12] Große, P., May, N. and Lehner, W. (2014). A study of partitioning and parallel UDF execution with the SAP HANA database, Proceedings of the 26th International Conference on Scientific and Statistical Database Management, Aalborg, Denmark, p. 36.
[13] Hedin, G. (2000). Reference attributed grammars, Informatica (Slovenia) 24(3): 301–317.
[14] Karagiannis, A., Vassiliadis, P. and Simitsis, A. (2013). Scheduling strategies for efficient ETL execution, Information Systems 38(6): 927–945.
[15] Karol, S. (2015). Well-formed and Scalable Invasive Software Composition, PhD dissertation, Technische Universitat Dresden, Dresden.
[16] Kiczales, G., Lamping, J., Mendhekar, A., Maeda, C., Lopes, C., Loingtier, J.-M. and Irwin, J. (1997). Aspect-oriented programming, in M. Akşit and S. Matsuoka (Eds.), European Conference on Object-oriented Programming, Springer, Berlin/Heidelberg, pp. 220–242.
[17] Kumar, N. and Kumar, P.S. (2010). An efficient heuristic for logical optimization of ETL workflows, International Workshop on Business Intelligence for the Real-Time Enterprise, Singapore, Singapore, pp. 68–83.
[18] Liu, X., Thomsen, C. and Pedersen, T.B. (2013). ETLMR: A highly scalable dimensional etl framework based on MaprEduce, in A. Hameurlain et al. (Eds.), Transactions on Large-Scale Data-and Knowledge-Centered Systems VIII, Springer, Berlin/Heidelberg, pp. 1–31.
[19] Manning, C.D., Surdeanu, M., Bauer, J., Finkel, J., Bethard, S. and McClosky, D. (2014). The Stanford CoreNLP natural language processing toolkit, Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics: System Demonstrations, Baltimore, MD, USA, pp. 55–60.
[20] Mey, J., Karol, S., Aßmann, U., Huismann, I., Stiller, J. and Fröhlich, J. (2016). Using semantics-aware composition and weaving for multi-variant progressive parallelization, Procedia Computer Science 80: 1554–1565.
[21] Nambiar, R.O. and Poess, M. (2006). The making of TPC-DS, Proceedings of the 32nd International Conference on Very Large Data Bases, Seoul, Korea, pp. 1049–1058.
[22] Simitsis, A., Vassiliadis, P. and Sellis, T. (2005). State-space optimization of ETL workflows, IEEE Transactions on Knowledge and Data Engineering 17(10): 1404–1419.
[23] Simitsis, A., Wilkinson, K., Dayal, U. and Castellanos, M. (2010). Optimizing ETL workflows for fault-tolerance, IEEE 26th International Conference on Data Engineering (ICDE), Long Beach, CA, USA, pp. 385–396.
[24] Thomsen, C. and Pedersen, T.B. (2011). Easy and effective parallel programmable ETL, Proceedings of the ACM 14th International Workshop on Data Warehousing and OLAP, New York, NY, USA, pp. 37–44.
[25] Tziovara, V., Vassiliadis, P. and Simitsis, A. (2007). Deciding the physical implementation of ETL workflows, Proceedings of the International Workshop on Data Warehousing and OLAP, New York, NY, USA, pp. 49–56.
[26] Vassiliadis, P., Simitsis, A. and Baikousi, E. (2009). A taxonomy of ETL activities, Proceedings of the ACM 12th International Workshop on Data Warehousing and OLAP, New York, NY, USA, pp. 25–32.
[27] Weinberg, A.I. and Last, M. (2017). Interpretable decision-tree induction in a big data parallel framework, International Journal of Applied Mathematics and Computer Science 27(4): 737–748, DOI: 10.1515/amcs-2017-0051.