Voir la notice de l'article provenant de la source Library of Science
@article{IJAMCS_2023_33_1_a8, author = {Lazebnik, Teddy and Rosenfeld, Avi}, title = {FSPL: {A} meta-learning approach for a filter and embedded feature selection pipeline}, journal = {International Journal of Applied Mathematics and Computer Science}, pages = {103--115}, publisher = {mathdoc}, volume = {33}, number = {1}, year = {2023}, language = {en}, url = {http://geodesic.mathdoc.fr/item/IJAMCS_2023_33_1_a8/} }
TY - JOUR AU - Lazebnik, Teddy AU - Rosenfeld, Avi TI - FSPL: A meta-learning approach for a filter and embedded feature selection pipeline JO - International Journal of Applied Mathematics and Computer Science PY - 2023 SP - 103 EP - 115 VL - 33 IS - 1 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/IJAMCS_2023_33_1_a8/ LA - en ID - IJAMCS_2023_33_1_a8 ER -
%0 Journal Article %A Lazebnik, Teddy %A Rosenfeld, Avi %T FSPL: A meta-learning approach for a filter and embedded feature selection pipeline %J International Journal of Applied Mathematics and Computer Science %D 2023 %P 103-115 %V 33 %N 1 %I mathdoc %U http://geodesic.mathdoc.fr/item/IJAMCS_2023_33_1_a8/ %G en %F IJAMCS_2023_33_1_a8
Lazebnik, Teddy; Rosenfeld, Avi. FSPL: A meta-learning approach for a filter and embedded feature selection pipeline. International Journal of Applied Mathematics and Computer Science, Tome 33 (2023) no. 1, pp. 103-115. http://geodesic.mathdoc.fr/item/IJAMCS_2023_33_1_a8/
[1] [1] Aarts, E.H.L. and van Laarhoven, P.J.M. (1987). Simulated annealing: A pedestrian review of the theory and some applications, in P.A. Devijver and J. Kittler (Eds), Pattern Recognition Theory and Applications, Springer, Berlin/Heidelberg, pp. 179-192.
[2] [2] Abdullah, A.S., Selvakumar, S., Karthikeyan, P. and Venkatesh, M. (2017). Comparing the efficacy of decision tree and its variants using medical data, Indian Journal of Science and Technology 10: 1-8.
[3] [3] Akshaikhdeeb, B. and Ahmad, K. (2017). Feature selection for chemical compound extraction using wrapper approach with naive Bayes classifier, 6th International Conference on Electrical Engineering and Informatics (ICEEI), Langkawi, Malaysia, pp. 1-6.
[4] [4] Anthony, T., Tian, Z. and Barber, D. (2017). Thinking fast and slow with deep learning and tree search, Conference on Neural Information Processing Systems, Long Beach, USA.
[5] [5] Azhagusundari, B. and Thanamani, A.S. (2013). Feature selection based on information gain, International Journal of Innovative Technology and Exploring Engineering 2(2): 18-21.
[6] [6] Bilalli, B., Abelló, A. and Aluja-Banet, T. (2017). On the predictive power of metafeatures in OpenML, International Journal of Applied Mathematics and Computer Science 27(4): 697-712, DOI: 10.1515/amcs-2017-0048.
[7] [7] Bo, L. and Rein, L. (2005). Comparison of the Luus-Jaakola optimization procedure and the genetic algorithm, Engineering Optimization 37(4): 381-396.
[8] [8] Bo, Z.W., Hua, L.Z. and Yu, Z.G. (2006). Optimization of process route by genetic algorithms, Robotics and Computer-Integrated Manufacturing 22: 180-188.
[9] [9] Bolón-Canedo, V. and Alonso-Betanzos, A. (2019). Ensembles for feature selection: A review and future trends, Information Fusion 52: 1-12.
[10] [10] Brazdil, P., Giraud-Carrier, C., Soares, C. and Vilalta, R. (2009). Metalearning: Applications to Data Minings, Springer, Berlin/Heidelberg.
[11] [11] Chandrashekar, G. and Sahin, F. (2014). A survey on feature selection methods, Computers Electrical Engineering 40(1): 16-28.
[12] [12] Chengzhang, L. and Jiucheng, X. (2019). Feature selection with the Fisher score followed by the maximal clique centrality algorithm can accurately identify the hub genes of hepatocellular carcinoma, Scientific Reports 9: 17283.
[13] [13] Drori, I., Krishnamurthy, Y., Rampin, R., de Paula Lourenco, R., Ono, J.P., Cho, K., Silva, C. and Freire, J. (2018). AlphaD3M: Machine learning pipeline synthesis, AutoML Workshop at ICML, Stockholm, Sweden.
[14] [14] Engels, R. and Theusinger, C. (1998). Using a data metric for preprocessing advice for data mining applications, European Conference on Artificial Intelligence, Brighton, UK, pp. 23-28.
[15] [15] Erickson, N., Mueller, J., Shirkov, A., Zhang, H., Larroy, P., Li, M. and Smola, A. (2020). AutoGluon-tabular: Robust and accurate AutoML for structured data, arXiv: 2003.06505.
[16] [16] Feurer, M., Eggensperger, K., Falkner, S., Lindauer, M. and Hutter, F. (2020). Auto-Sklearn 2.0: Hands-free AutoML via meta-learning, arXiv: 2007.04074.
[17] [17] Feurer, M., Klevin, A., Eggensperger, K., Springenberg, J.T., Blum, M. and Hutter, F. (2019). Auto-sklearn: Efficient and robust automated machine learning, in F. Hutter et al. (Eds), Automated Machine Learning, Springer, Cham, pp. 113-134.
[18] [18] Feurer, M., Springenberg, J.T. and Hutter, F. (2014). Using meta-learning to initialize Bayesian optimization of hyperparameters, International Conference on Metalearning and Algorithm Selection, Prague, Czech Republic, pp. 3-10.
[19] [19] Feurer, M., Springenberg, J.T. and Hutter, F. (2015). Initializing Bayesian hyperparameter optimization via meta-learning, Proceedings of the 29th AAAI Conference on Artificial Intelligence, Austin, USA, pp. 1128-1135.
[20] [20] Freitas, A.A. (2014). Comprehensible classification models: A position paper, ACM SIGKDD Explorations Newsletter 15(1): 1-10.
[21] [21] Fushiki, T. (2011). Estimation of prediction error by using k-fold cross-validation, Statistical Computation 21: 137-146.
[22] [22] Ghaheri, A., Shoar, S., Naderan, M. and Hoseini, S.S. (2005). The applications of genetic algorithms in medicine, Oman Medical Journal 30(6): 406-416.
[23] [23] Gil, Y., Yao, K.-T., Ratnakar, V., Garijo, D., Steeg, G.V., Szekely, P., Brekelmans, R., Kejriwal, M., Lau, F. and Huang, I.-H. (2018). P4ml: A phased performance-based pipeline planner for automated machine learning, AutoML Workshop at ICML, Stockholm, Sweden.
[24] [24] Grabmeier, J.L. and Lambe, L.A. (2007). Decision trees for binary classification variables grow equally with the Gini impurity measure and Pearson’s chi-square test, International Journal of Business Intelligence and Data Mining 2(2): 213-226.
[25] [25] Gu, Q., Li, Z. and Han, J. (2011). Generalized Fisher score for feature selection, Proceedings of the 27th Conference on Uncertainty in Artificial Intelligence, Barcelona, Spain, p. 266-273.
[26] [26] He, X., Zhao, K. and Chu, X. (2021). AutoML: A survey of the state-of-the-art, Knowledge-Based Systems 212: 106622.
[27] [27] Holland, J.H. (1992). Genetic algorithms, Scientific American 267(1): 66-73.
[28] [28] Ivosev, G., Burton, L. and Bonner, R. (2008). Dimensionality reduction and visualization in principal component analysis, Analytical Chemistry 80(13): 4933-4944.
[29] [29] Kang, Y., Cai, Z., Tan, C.-W., Huang, Q. and Liu, H. (2020). Natural language processing (NLP) in management research: A literature review, Journal of Management Analytics 7(2): 139-172.
[30] [30] Kanna, S.S. and Ramaraj, N. (2010). Feature selection algorithms: A survey and experimental evaluation, Knowledge-Based Systems 23(6): 580-585.
[31] [31] Keren Simon, L., Liberzon, A. and Lazebnik, T. (2023). A computational framework for physics-informed symbolic regression with straightforward integration of domain knowledge, Scientific Reports 13(1): 1249.
[32] [32] Kietz, J.-U., Serban, F., Bernstein, A. and Fischer, S. (2012). Designing KDD workflows via HTN-planning for intelligent discovery assistance, 5th Planning to Learn Workshop at the European Conference on Artificial Intelligence, Montpellier, France.
[33] [33] Kumar, V. and Minz, S. (2014). Feature selection: A literature review, Smart Computing Review 4(3): 211-229.
[34] [34] Kusy, M. and Zajdel, R. (2021). A weighted wrapper approach to feature selection, International Journal of Applied Mathematics and Computer Science 31(4): 685-696, DOI: 10.34768/amcs-2021-0047.
[35] [35] Lazebnik, T., Zaher, B., Bunimovich-Mendrazitsky, S. and Halachmi, S. (2022). Predicting acute kidney injury following open partial nephrectomy treatment using sat-pruned explainable machine learning model, BMC Medical Informatics and Decision Making 22: 133.
[36] [36] Lemka, C., Budka, M. and Gabrys, B. (2015). Metalearning: A survey of trends and technologies, Artificial Intelligence Review 44(1): 117-130.
[37] [37] Lin, X., Li, C., Ren, W., Luo, X. and Qi, Y. (2019). A new feature selection method based on symmetrical uncertainty and interaction gain, Computational Biology and Chemistry 83: 107149.
[38] [38] Liu, Y., Mu, Y., Chen, K., Li, Y. and Guo, J. (2020). Daily activity feature selection in smart homes based on Pearson correlation coefcient, Neural Processing Letters 51: 1771-1787.
[39] [39] Luo, G. (2016). A review of automatic selection methods for machine learning algorithms and hyper-parameter values, Network Modeling Analysis in Health Informatics and Bioinformatics 5(1): 18.
[40] [40] Ma, L., Li, M., Gao, Y., Chen, T., Ma, X. and Qu, L. (2017). A novel wrapper approach for feature selection in object-based image classification using polygon-based cross-validation, IEEE Geoscience and Remote Sensing Letters 14(3): 409-413.
[41] [41] Maile, H., Li, J.O., Gore, D., Leucci, M., Mulholland, P., Hau, S., Szabo, A., Moghul, I., Balaskas, K., Fujinami, K., Hysi, P., Davidson, A., Liskova, P. Hardcastle, A., Tuft, S. and Pontikos, N. (2021). Machine learning algorithms to detect subclinical keratoconus: Systematic review, JMIR Medical Informatics 9(12): e27363.
[42] [42] Molina, L.C., Belanche, L. and Nebot, A. (2002). Feature selection algorithms: A survey and experimental evaluation, 2002 IEEE International Conference on Data Mining, Maebashi City, Japan, pp. 306-313.
[43] [43] Mussa, D.J. and Jameel, N. G.M. (2019). Relevant SMS spam feature selection using wrapper approach and XGBoost algorithm, Kurdistan Journal of Applied Research 4(2): 110-120.
[44] [44] Muthukrishnan, R. and Rohini, R. (2016). Lasso: A feature selection technique in predictive modeling for machine learning, IEEE International Conference on Advances in Computer Applications (ICACA), Coimbatore, India, pp. 18-20.
[45] [45] Neumann, J., Schnorr, C. and Steidl, G. (2005). Combined SVM-based feature selection and classification, Machine Learning 61: 129-150.
[46] [46] Nguyen, P., Hilario, M. and Kalousis, A. (2014). Using meta-mining to support data mining workflow planning and optimization, Journal of Artificial Intelligence Research 51: 605-644.
[47] [47] Nisioti, E., Chatzidimitriou, K.C. and Symeonidis, A.L. (2018). Predicting hyperparameters from meta-features in binary classification problems, AutoML Workshop at International Conference on Machine Learning, Stockholm, Sweden.
[48] [48] Oliveto, P. S. and Witt, C. (2015). Improved time complexity analysis of the simple genetic algorithm, Theoretical Computer Science 605: 21-41.
[49] [49] Olson, R.S. and Moore, J.H. (2016). TPOT: A tree-based pipeline optimization tool for automating machine learning, JMLR: Workshop and Conference Proceedings 64: 66-74.
[50] [50] Ometto, G., Moghul, I., Montesano, G., Hunter, A., Pontikos, N., Jones, P. R., Keane, P.A., Liu, X., Denniston, A.K. and Crabb, D.P. (2019). ReLayer: A free, online tool for extracting retinal thickness from cross-platform oct images, Translational Vision Science and Technology 8(3): 25.
[51] [51] Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., Blondel, M., Prettenhofer, P., Weiss, R., Dubourg, V. (2011). Scikit-learn: Machine learning in Python, Journal of Machine Learning Research 12: 2825-2830.
[52] [52] Pinto, F., Cerqueira, V., Soares, C. and Mendes-Moreira, J. (2017). Autobagging: Learning to rank bagging workflows with metalearning, arXiv: 1706.09367.
[53] [53] Plackett, R.L. (1983). Karl Pearson and the chi-squared test, International Statistical Review/Revue Internationale de Statistique 51: 59-72.
[54] [54] Reif, M., Shafait, F. and Dengel, A. (2012). Meta-learning for evolutionary parameter optimization of classifiers, Machine Learning 87: 357-380.
[55] [55] Rice, J.R. (1976). The algorithm selection problem, Advances in Computers 15: 65-118.
[56] [56] Rokach, L. (2016). Decision forest: Twenty years of research, Information Fusion 27: 111-125.
[57] [57] Rosenfeld, A. (2021). Better metrics for evaluating explainable artificial intelligence, AAMAS’21: 20th International Conference on Autonomous Agents and Multiagent Systems, pp. 45-50, (virtual).
[58] [58] Rosenfeld, A. and Freiman, M. (2021). Explainable feature ensembles through homogeneous and heterogeneous intersections, JCAI-PRICAI 2020 Workshop on Explainable Artificial Intelligence, (online).
[59] [59] Rosenfeld, A., Graham, D.G., Hamoudi, R., Butawan, R., Eneh, V., Khan, S., Miah, H., Niranjan, M. and Lovat, L.B. (2015). MIAT: A novel attribute selection approach to better predict upper gastrointestinal cancer, International Conference on Data Science and Advanced Analytics, Paris, France.
[60] [60] Rosenfeld, A. and Richardson, A. (2019). Explainability in human-agent systems, Autonomous Agents and MultiAgent Systems 33(6): 673-705.
[61] [61] Saeys, Y., Abeel, T. and de Peer, Y.V. (2008). Robust feature selection using ensemble feature selection techniques, in W. Daelemans et al. (Eds), Machine Learning and Knowledge Discovery in Databases, Springer, Berlin, pp. 313-325.
[62] [62] Savchenko, E. and Lazebnik, T. (2022). Computer aided functional style identification and correction in modern Russian texts, Journal of Data, Information and Management 4: 25-32.
[63] [63] Seijo-Pardo, B., Porto-Díaz, I., Bolón-Canedo, V. and Alonso-Betanzos, A. (2017). Ensemble feature selection: Homogeneous and heterogeneous approaches, KnowledgeBased Systems 118: 124-139.
[64] [64] Serban, F., Vanschoren, J., Kietz, J.U. and Bernstein, A.A. (2013). A survey of intelligent assistants for data analysis, ACM Computing Surveys 45(3): 1-35.
[65] [65] Sharma, A., Imoto, S. and Miyano, S. (2012). A top-r feature selection algorithm for microarray gene expression data, IEEE/ACM Transactions on Computational Biology and Bioinformatics 9(3): 754-764.
[66] [66] Shatte, A.B.R., Hutchinson, D.M. and Teague, S.J. (2019). Machine learning in mental health: A scoping review of methods and applications, Psychological Medicine 49(9): 1426-1448.
[67] [67] Shen, Z., Chen, X. and Garibaldi, J.M. (2020). A novel meta learning framework for feature selection using data synthesis and fuzzy similarity, IEEE World Congress on Computational Intelligence, (online).
[68] [68] Shilbayeh, S. and Vadera, S. (2014). Feature selection in meta learning framework, Science and Information Conference, London, UK, pp. 269-275.
[69] [69] Smith-Miles, K.A. (2009). Cross-disciplinary perspectives on meta-learning for algorithm selection, ACM Computational Surveys 41(1): 6.
[70] [70] Soares, C., Brazdil, P.B. and Kuba, P. (2004). A meta-learning method to select the kernel width in support vector regression, Machine Learning 54: 195-209.
[71] [71] Strang, B., van der Putten, P., van Rijn, J.N. and Hutter, F. (2018). Don’t rule out simple models prematurely: A large scale benchmark comparing linear and non-linear classifiers in OpenML, in W. Duivesteijn et al. (Eds), Advances in Intelligent Data Analysis XVII, Springer, Berlin, pp. 303-315.
[72] [72] Swain, P. H. and Hauska, H. (1977). The decision tree classifier: Design and potential, IEEE Transactions on Geoscience Electronics 15(3): 142-147.
[73] [73] Tang, J., Alelyani, S. and Liu, H. (2014). Feature Selection for Classification: A Review, CRC Press, Boca Raton.
[74] [74] Teisseyre, P. (2022). Joint feature selection and classification for positive unlabelled multi-label data using weighted penalized empirical risk minimization, International Journal of Applied Mathematics and Computer Science 32(2): 311-322, DOI: 10.34768/amcs-2022-0023.
[75] [75] Tokarev, K.E., Zotov, V.M., Khavronina, V.N. and Rodionova, O.V. (2021). Convolutional neural network of deep learning in computer vision and image classification problems, IOP Conference Series: Earth and Environmental Science 786(1): 012040.
[76] [76] Vanschoren, J. (2018). Meta-learning: A survey, arXiv: 1810.03548.
[77] [77] Vasan, K.K. and Surendiran, B. (2016). Dimensionality reduction using principal component analysis for network intrusion detection, Perspectives in Science 8: 510-512.
[78] [78] Waring, J., Lindvall, C. and Umeton, R. (2020). Automated machine learning: Review of the state-of-the-art and opportunities for healthcare, Artificial Intelligence in Medicine 104: 101822.
[79] [79] Wasimuddin, M., Elleithy, K., Abuzneid, A.-S., Faezipour, M. and Abuzaghleh, O. (2020). Stages-based ECG signal analysis from traditional signal processing to machine learning approaches: A survey, IEEE Access 8: 177782-177803.
[80] [80] Wu, S., Roberts, K., Datta, S., Du, J., Ji, Z., Si, Y., Soni, S., Wang, Q., Wei, Q., Xiang, Y., Zhao, B. and Xu, H. (2020). Deep learning in clinical natural language processing: A methodical review, Journal of the American Medical Informatics Association 27(3): 457-470.
[81] [81] Zebari, R.R., Abdulazeez, A.M., Zeebaree, D.Q., Zebari, D.A. and Saeed, J.N. (2020). A comprehensive review of dimensionality reduction techniques for feature selection and feature extraction, Journal of Applied Science and Technology Trends 1(2): 56-70.
[82] [82] Zhu, X., Huang, Z., T., S.H., Cheng, J. and Xu, C. (2012). Dimensionality reduction by mixed kernel canonical correlation analysis, Pattern Recognition 45(8): 3003-3016.