@article{ZNSL_2023_530_a4,
author = {D. Kushchuk and M. Ryndin},
title = {Neuron coverage maximization for effective test set construction with respect to the model},
journal = {Zapiski Nauchnykh Seminarov POMI},
pages = {51--67},
year = {2023},
volume = {530},
language = {en},
url = {http://geodesic.mathdoc.fr/item/ZNSL_2023_530_a4/}
}
TY - JOUR AU - D. Kushchuk AU - M. Ryndin TI - Neuron coverage maximization for effective test set construction with respect to the model JO - Zapiski Nauchnykh Seminarov POMI PY - 2023 SP - 51 EP - 67 VL - 530 UR - http://geodesic.mathdoc.fr/item/ZNSL_2023_530_a4/ LA - en ID - ZNSL_2023_530_a4 ER -
D. Kushchuk; M. Ryndin. Neuron coverage maximization for effective test set construction with respect to the model. Zapiski Nauchnykh Seminarov POMI, Investigations on applied mathematics and informatics. Part II–2, Tome 530 (2023), pp. 51-67. http://geodesic.mathdoc.fr/item/ZNSL_2023_530_a4/
[1] P. N. Bennett and V. R. Carvalho, “Online stratified sampling: evaluating classifiers at web-scale”, Proceedings of the 19th ACM international conference on Information and knowledge management, 2010, 1581–1584
[2] F. Harel-Canada, L. Wang, M. A. Gulzar, Q. Gu, and M. Kim, Is neuron coverage a meaningful measure for testing deep neural networks?, Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering, 2020, 851–862 | DOI
[3] D. Ji, R. L. Logan IV, P. Smyth, and M. Steyvers, Active Bayesian assessment for black-box classifiers, 2020, arXiv: 2002.06532
[4] J. Kossen, S. Farquhar, Y. Gal, and T. Rainforth, “Active testing: Sample-efficient model evaluation”, International Conference on Machine Learning, PMLR 2021, 5753–5763
[5] E. Lanus, L. J. Freeman, D. R. Kuhn, and R. N. Kacker, “Combinatorial testing metrics for machine learning”, 2021 IEEE International Conference on Software Testing, Verification and Validation Workshops (ICSTW), IEEE, 2021, 81–84 | DOI | MR
[6] N. Loukachevitch, P. Blinov, E. Kotelnikov, Y. Rubtsova, V. Ivanov, and E. Tutubalina, “SentiRuEval: Testing object-oriented sentiment analysis systems in russian”, Proceedings of International Conference Dialog, v. 2, 2015, 3–13
[7] N. Loukachevitch and Y. V. Rubtsova, “SentiRuEval-2016: Overcoming time gap and data sparsity in tweet sentiment analysis”, Computational Linguistics and Intellectual Technologies, 2016, 416–426
[8] J. Lu, A. Liu, F. Dong, F. Gu, J. Gama, and G. Zhang, “Learning under concept drift: A review”, IEEE Transactions on Knowledge and Data Engineering, 31:12 (2018), 2346–2363
[9] L. Ma, F. Juefei-Xu, F. Zhang, J. Sun, M. Xue, B. Li, C. Chen, T. Su, L. Li, Y. Liu, et al., “DeepGauge: Multi-granularity testing criteria for deep learning systems”, Proceedings of the 33rd ACM/IEEE International Conference on Automated Software Engineering, 2018, 120–131
[10] S. Mani, A. Sankaran, S. Tamilselvam, and A. Sethi, Coverage testing of deep learning models using dataset characterization, 2019, arXiv: 1911.07309
[11] V. Mayorov, I. Andrianov, N. Astrakhantsev, V. Avanesov, I. Kozlov, and D. Turdakov, “A high precision method for aspect extraction in Russian”, Komp'juternaja Lingvistika i Intellektual'nye Tehnologii, 2015, 34–43
[12] A. Odena, C. Olsson, D. Andersen, and I. Goodfellow, “TensorFuzz: Debugging neural networks with coverage-guided fuzzing”, International Conference on Machine Learning, PMLR 2019, 4901–4911
[13] K. Pei, Y. Cao, J. Yang, and S. Jana, “DeepXplore: Automated whitebox testing of deep learning systems”, Proceedings of the 26th Symposium on Operating Systems Principles, 2017, 1–18 | MR | Zbl
[14] Y. Tian, K. Pei, S. Jana, and B. Ray, “Deeptest: Automated testing of deep-neural-network-driven autonomous cars”, Proceedings of the 40th international conference on software engineering, 2018, 303–314 | DOI | MR
[15] A. Tsymbal, “The problem of concept drift: Definitions and related work”, Computer Science Department, Trinity College Dublin, 106:2 (2004), 58
[16] Z. Yang, J. Shi, M. H. Asyrofi, and D. Lo, Revisiting neuron coverage metrics and quality of deep neural networks, 2022, arXiv: 2201.00191 | MR