Voir la notice de l'article provenant de la source Library of Science
@article{IJAMCS_2017_27_4_a4, author = {Koziarski, M. and Wo\'zniak, M.}, title = {CCR: {A} combined cleaning and resampling algorithm for imbalanced data classification}, journal = {International Journal of Applied Mathematics and Computer Science}, pages = {727--736}, publisher = {mathdoc}, volume = {27}, number = {4}, year = {2017}, language = {en}, url = {http://geodesic.mathdoc.fr/item/IJAMCS_2017_27_4_a4/} }
TY - JOUR AU - Koziarski, M. AU - Woźniak, M. TI - CCR: A combined cleaning and resampling algorithm for imbalanced data classification JO - International Journal of Applied Mathematics and Computer Science PY - 2017 SP - 727 EP - 736 VL - 27 IS - 4 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/IJAMCS_2017_27_4_a4/ LA - en ID - IJAMCS_2017_27_4_a4 ER -
%0 Journal Article %A Koziarski, M. %A Woźniak, M. %T CCR: A combined cleaning and resampling algorithm for imbalanced data classification %J International Journal of Applied Mathematics and Computer Science %D 2017 %P 727-736 %V 27 %N 4 %I mathdoc %U http://geodesic.mathdoc.fr/item/IJAMCS_2017_27_4_a4/ %G en %F IJAMCS_2017_27_4_a4
Koziarski, M.; Woźniak, M. CCR: A combined cleaning and resampling algorithm for imbalanced data classification. International Journal of Applied Mathematics and Computer Science, Tome 27 (2017) no. 4, pp. 727-736. http://geodesic.mathdoc.fr/item/IJAMCS_2017_27_4_a4/
[1] Aggarwal, C.C., Hinneburg, A. and Keim, D.A. (2001). On the surprising behavior of distance metrics in high dimensional space, International Conference on Database Theory, London, UK, pp. 420–434.
[2] Alcalá, J., Fernández, A., Luengo, J., Derrac, J., García, S., Sánchez, L. and Herrera, F. (2010). KEEL data-mining software tool: Data set repository, integration of algorithms and experimental analysis framework, Journal of Multiple-Valued Logic and Soft Computing 17(2–3): 255–287.
[3] Barua, S., Islam, M.M., Yao, X. and Murase, K. (2014). MWMOTE—majority weighted minority oversampling technique for imbalanced data set learning, IEEE Transactions on Knowledge and Data Engineering 26(2): 405–425.
[4] Batista, G.E., Prati, R.C. and Monard, M.C. (2004). A study of the behavior of several methods for balancing machine learning training data, ACM SIGKDD Explorations Newsletter 6(1): 20–29.
[5] Bunkhumpornpat, C. and Sinapiromsaran, K. (2015). CORE: Core-based synthetic minority over-sampling and borderline majority under-sampling technique, International Journal of Data Mining and Bioinformatics 12(1): 44–58.
[6] Bunkhumpornpat, C., Sinapiromsaran, K. and Lursinsap, C. (2009). Safe-level-SMOTE: Safe-level-synthetic minority over-sampling technique for handling the class imbalanced problem, Pacific-Asia Conference on Knowledge Discovery and Data Mining, Bangkok, Thailand, pp. 475–482.
[7] Chawla, N.V., Bowyer, K.W., Hall, L.O. and Kegelmeyer, W.P. (2002). SMOTE: Synthetic minority over-sampling technique, Journal of Artificial Intelligence Research 16: 321–357.
[8] Chawla, N.V., Lazarevic, A., Hall, L.O. and Bowyer, K.W. (2003). SMOTEBoost: Improving prediction of the minority class in boosting, European Conference on Principles of Data Mining and Knowledge Discovery, Cavtat/Dubrovnik, Croatia, pp. 107–119.
[9] Dubey, R., Zhou, J., Wang, Y., Thompson, P.M. and Ye, J. (2014). Analysis of sampling techniques for imbalanced data: An n = 648 ADNI study, NeuroImage 87: 220–241.
[10] Estabrooks, A., Jo, T. and Japkowicz, N. (2004). A multiple resampling method for learning from imbalanced data sets, Computational Intelligence 20(1): 18–36.
[11] Fernández, A., López, V., Galar, M., Del Jesus, M.J. and Herrera, F. (2013). Analysing the classification of imbalanced data-sets with multiple classes: Binarization techniques and ad-hoc approaches, Knowledge-Based Systems 42: 97–110.
[12] Fernández-Navarro, F., Hervás-Martínez, C. and Gutiérrez, P.A. (2011). A dynamic over-sampling procedure based on sensitivity for multi-class problems, Pattern Recognition 44(8): 1821–1833.
[13] Galar, M., Fernandez, A., Barrenechea, E., Bustince, H. and Herrera, F. (2012). A review on ensembles for the class imbalance problem: Bagging-, boosting-, and hybrid-based approaches, IEEE Transactions on Systems, Man, and Cybernetics C: Applications and Reviews 42(4): 463–484.
[14] Galar, M., Fernández, A., Barrenechea, E. and Herrera, F. (2013). EUSBoost: Enhancing ensembles for highly imbalanced data-sets by evolutionary undersampling, Pattern Recognition 46(12): 3460–3471.
[15] García, S. and Herrera, F. (2009). Evolutionary undersampling for classification with imbalanced datasets: Proposals and taxonomy, Evolutionary Computation 17(3): 275–306.
[16] García, V., Sánchez, J. and Mollineda, R. (2007). An empirical study of the behavior of classifiers on imbalanced and overlapped data sets, Iberoamerican Congress on Pattern Recognition, Valparaiso, Chile, pp. 397–406.
[17] Han, H., Wang, W.-Y. and Mao, B.-H. (2005). Borderline-SMOTE: A new over-sampling method in imbalanced data sets learning, International Conference on Intelligent Computing, Hefei, China, pp. 878–887.
[18] Hao, M., Wang, Y. and Bryant, S.H. (2014). An efficient algorithm coupled with synthetic minority over-sampling technique to classify imbalanced PubChem BioAssay data, Analytica Chimica Acta 806: 117–127.
[19] He, H., Bai, Y., Garcia, E.A. and Li, S. (2008). ADASYN: Adaptive synthetic sampling approach for imbalanced learning, 2008 IEEE International Joint Conference on Neural Networks (IEEEWorld Congress on Computational Intelligence), Hong Kong, China, pp. 1322–1328.
[20] He, H. and Garcia, E.A. (2009). Learning from imbalanced data, IEEE Transactions on Knowledge and Data Engineering 21(9): 1263–1284.
[21] Hoens, T.R., Polikar, R. and Chawla, N.V. (2012). Learning from streaming data with concept drift and imbalance: An overview, Progress in Artificial Intelligence 1(1): 89–101.
[22] Jo, T. and Japkowicz, N. (2004). Class imbalances versus small disjuncts, ACM SIGKDD Explorations Newsletter 6(1): 40–49.
[23] Khreich, W., Granger, E., Miri, A. and Sabourin, R. (2010). Iterative Boolean combination of classifiers in the ROC space: An application to anomaly detection with HMMs, Pattern Recognition 43(8): 2732–2752.
[24] Krawczyk, B. (2016). Learning from imbalanced data: Open challenges and future directions, Progress in Artificial Intelligence 5(4): 221–232.
[25] Laurikkala, J. (2001). Improving identification of difficult small classes by balancing class distribution, Conference on Artificial Intelligence in Medicine in Europe, Cascais, Portugal, pp. 63–66.
[26] Lemaitre, G., Nogueira, F. and Aridas, C.K. (2017). Imbalanced-learn: A Python toolbox to tackle the curse of imbalanced datasets in machine learning, Journal of Machine Learning Research 18(17): 1–5.
[27] Liu, X.-Y., Wu, J. and Zhou, Z.-H. (2009). Exploratory undersampling for class-imbalance learning, IEEE Transactions on Systems, Man, and Cybernetics B: Cybernetics 39(2): 539–550.
[28] Liu, Y.-H. and Chen, Y.-T. (2005). Total margin based adaptive fuzzy support vector machines for multiview face recognition, 2005 IEEE International Conference on Systems, Man and Cybernetics, Waikoloa, HI, USA, Vol. 2, pp. 1704–1711.
[29] López, V., Fernández, A., García, S., Palade, V. and Herrera, F. (2013). An insight into classification with imbalanced data: Empirical results and current trends on using data intrinsic characteristics, Information Sciences 250: 113–141.
[30] Maciejewski, T. and Stefanowski, J. (2011). Local neighbourhood extension of SMOTE for mining imbalanced data, 2011 IEEE Symposium on Computational Intelligence and Data Mining (CIDM), Paris, France, pp. 104–111.
[31] Mazurowski, M.A., Habas, P.A., Zurada, J.M., Lo, J.Y., Baker, J.A. and Tourassi, G.D. (2008). Training neural network classifiers for medical decision making: The effects of imbalanced datasets on classification performance, Neural Networks 21(2): 427–436.
[32] Napierała, K. and Stefanowski, J. (2012). Identification of different types of minority class examples in imbalanced data, International Conference on Hybrid Artificial Intelligence Systems, Salamanca, Spain, pp. 139–150.
[33] Napierała, K., Stefanowski, J. and Wilk, S. (2010). Learning from imbalanced data in presence of noisy and borderline examples, International Conference on Rough Sets and Current Trends in Computing, Warsaw, Poland, pp. 158–167.
[34] Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., Blondel, M., Prettenhofer, P., Weiss, R. and Dubourg, V. (2011). Scikit-learn: Machine learning in Python, Journal of Machine Learning Research 12(Oct): 2825–2830.
[35] Prati, R.C., Batista, G. and Monard, M.C. (2004). Class imbalances versus class overlapping: An analysis of a learning system behavior, Mexican International Conference on Artificial Intelligence, Mexico City, Mexico, pp. 312–321.
[36] Ramentol, E., Verbiest, N., Bello, R., Caballero, Y., Cornelis, C. and Herrera, F. (2012). SMOTE-FRST: A new resampling method using fuzzy rough set theory, 10th International FLINS Conference on Uncertainty Modelling in Knowledge Engineering and Decision Making, Istanbul, Turkey.
[37] Sáez, J. A., Galar, M., Luengo, J. and Herrera, F. (2013). Tackling the problem of classification with noisy data using multiple classifier systems: Analysis of the performance and robustness, Information Sciences 247: 1–20.
[38] Sanz, J.A., Bernardo, D., Herrera, F., Bustince, H. and Hagras, H. (2015). A compact evolutionary interval-valued fuzzy rule-based classification system for the modeling and prediction of real-world financial applications with imbalanced data, IEEE Transactions on Fuzzy Systems 23(4): 973–990.
[39] Stefanowski, J. (2016). Dealing with data difficulty factors while learning from imbalanced data, in S. Matwin and J. Mielniczuk (Eds.), Challenges in Computational Statistics and Data Mining, Springer, Heilderberg, pp. 333–363.
[40] Stefanowski, J. and Wilk, S. (2008). Selective pre-processing of imbalanced data for improving classification performance, International Conference on Data Warehousing and Knowledge Discovery, Turin, Italy, pp. 283–292.
[41] Sun, Y., Wong, A.K. and Kamel, M.S. (2009). Classification of imbalanced data: A review, International Journal of Pattern Recognition and Artificial Intelligence 23(04): 687–719.
[42] Tomek, I. (1976). Two modifications of CNN, IEEE Transactions on Systems, Man, and Cybernetics 6(11): 769–772.
[43] Triguero, I., del Río, S., López, V., Bacardit, J., Benítez, J.M. and Herrera, F. (2015). ROSEFW-RF: The winner algorithm for the ECBDL14 big data competition. An extremely imbalanced big data bioinformatics problem, Knowledge-Based Systems 87: 69–79.
[44] Van Hulse, J., Khoshgoftaar, T.M. and Napolitano, A. (2007). Skewed class distributions and mislabeled examples, 7th IEEE International Conference on Data Mining Workshops (ICDMW 2007), Omaha, NE, USA, pp. 477–482.
[45] Verbiest, N., Ramentol, E., Cornelis, C. and Herrera, F. (2014). Preprocessing noisy imbalanced datasets using SMOTE enhanced with fuzzy rough prototype selection, Applied Soft Computing 22: 511–517.
[46] Wang, S. and Yao, X. (2012). Multiclass imbalance problems: Analysis and potential solutions, IEEE Transactions on Systems, Man, and Cybernetics B: Cybernetics 42(4): 1119–1130.
[47] Wei, W., Li, J., Cao, L., Ou, Y. and Chen, J. (2013). Effective detection of sophisticated online banking fraud on extremely imbalanced data, World Wide Web 16(4): 449–475.
[48] Wilson, D.L. (1972). Asymptotic properties of nearest neighbor rules using edited data, IEEE Transactions on Systems, Man, and Cybernetics 2(3): 408–421.
[49] Yu, H., Ni, J. and Zhao, J. (2013). ACOSampling: An ant colony optimization-based undersampling method for classifying imbalanced DNA microarray data, Neurocomputing 101: 309–318.
[50] Zhang, H. and Li, M. (2014). RWO-sampling: A random walk over-sampling approach to imbalanced data classification, Information Fusion 20: 99–116.
[51] Zhang, Z., Krawczyk, B., García, S., Rosales-Pérez, A. and Herrera, F. (2016). Empowering one-vs-one decomposition with ensemble learning for multi-class imbalanced data, Knowledge-Based Systems 106: 251–263.