Voir la notice de l'article provenant de la source Math-Net.Ru
@article{MAIS_2024_31_2_a6, author = {D. A. Morozov and I. A. Smal and T. A. Garipov and A. V. Glazkova}, title = {Keywords, morpheme parsing and syntactic trees: features for text complexity assessment}, journal = {Modelirovanie i analiz informacionnyh sistem}, pages = {206--220}, publisher = {mathdoc}, volume = {31}, number = {2}, year = {2024}, language = {ru}, url = {http://geodesic.mathdoc.fr/item/MAIS_2024_31_2_a6/} }
TY - JOUR AU - D. A. Morozov AU - I. A. Smal AU - T. A. Garipov AU - A. V. Glazkova TI - Keywords, morpheme parsing and syntactic trees: features for text complexity assessment JO - Modelirovanie i analiz informacionnyh sistem PY - 2024 SP - 206 EP - 220 VL - 31 IS - 2 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/MAIS_2024_31_2_a6/ LA - ru ID - MAIS_2024_31_2_a6 ER -
%0 Journal Article %A D. A. Morozov %A I. A. Smal %A T. A. Garipov %A A. V. Glazkova %T Keywords, morpheme parsing and syntactic trees: features for text complexity assessment %J Modelirovanie i analiz informacionnyh sistem %D 2024 %P 206-220 %V 31 %N 2 %I mathdoc %U http://geodesic.mathdoc.fr/item/MAIS_2024_31_2_a6/ %G ru %F MAIS_2024_31_2_a6
D. A. Morozov; I. A. Smal; T. A. Garipov; A. V. Glazkova. Keywords, morpheme parsing and syntactic trees: features for text complexity assessment. Modelirovanie i analiz informacionnyh sistem, Tome 31 (2024) no. 2, pp. 206-220. http://geodesic.mathdoc.fr/item/MAIS_2024_31_2_a6/
[1] R. Flesch, “A new readability yardstick”, Journal of Applied Psychology, 32:3 (1948), 221 | DOI
[2] E. Dale, J. S. Chall, “A formula for predicting readability: Instructions”, Educational Research Bulletin, 27 (1948), 37–54
[3] R. Senter, E. A. Smith, Automated readability index, AMRL TR, Tech. Rep. 5302480, 1967 | Zbl
[4] M. Solnyshkina, V. Ivanov, V. Solovyev, “Readability formula for Russian texts: A modified version”, Proceedings of the 17th Mexican International Conference on Artificial Intelligence, v. II, 2018, 132–145 | DOI
[5] A. Churunina, M. Solnyshkina, E. Gafiyatova, A. Zaikin, “Lexical features of text complexity: The case of Russian academic texts”, SHS Web of Conferences, 88:1 (2020), 01–009 | DOI
[6] D. A. Morozov, A. V. Glazkova, B. L. Iomdin, “Text complexity and linguistic features: Their correlation in English and Russian”, Russian Journal of Linguistics, 26:2 (2022), 426–448 | DOI
[7] N. Karpov, J. Baranova, F. Vitugin, “Single-sentence readability prediction in Russian”, Analysis of Images, Social Networks and Texts, Springer International Publishing, Cham, 2014, 91–100 | DOI
[8] V. V. Ivanov, M. I. Solnyshkina, V. D. Solovyev, “Efficiency of text readability features in Russian academic texts”, Komp'juternaja Lingvistika I Intellektual'nye Tehnologii, 17 (2018), 267–283
[9] O. Blinova, N. Tarasov, “A hybrid model of complexity estimation: Evidence from Russian legal texts”, Frontiers in Artificial Intelligence, 5 (2022), 1 008 530 | DOI
[10] U. Isaeva, A. Sorokin, “Investigating the robustness of reading difficulty models for Russian educational texts”, Recent Trends in Analysis of Images, Social Networks and Texts, Springer International Publishing, Cham, 2021, 65–77 | DOI
[11] A. N. Laposhina, T. S. Veselovskaya, M. U. Lebedeva, O. F. Kupreshchenko, “Lexical analysis of the Russian language textbooks for primary school: Corpus study”, Komp'juternaja Lingvistika I Intellektual'nye Tehnologii, 18 (2019), 351–363
[12] V. Solovyev, V. Ivanov, M. Solnyshkina, “Readability formulas for three levels of Russian school textbooks”, Investigations on Applied Mathematics and Informatics, v. II-1, Zap. Nauchn. Sem. POMI, 529, 2023, 140–156
[13] A. N. Laposhina, M. Y. Lebedeva, A. A. Berlin Khenis, “Word frequency and text complexity: An eye-tracking study of young Russian readers”, Russian Journal of Linguistics, 26:2 (2022), 493–514 | DOI
[14] D. M. Blei, A. Y. Ng, M. I. Jordan, “Latent Dirichlet allocation”, The Journal of Machine Learning Research, 3 (2003), 993–1022 | Zbl
[15] A. Glazkova, Y. Egorov, M. Glazkov, “A comparative study of feature types for age-based text classification”, Analysis of Images, Social Networks and Texts, Springer International Publishing, Cham, 2021, 120–134 | DOI
[16] F. Pedregosa et al, “Scikit-learn: Machine learning in Python”, The Journal of Machine Learning Research, 12 (2011), 2825–2830 | MR | Zbl
[17] A. Kutuzov, E. Kuzmenko, “WebVectors: A toolkit for building web interfaces for vector semantic models”, Analysis of Images, Social Networks and Texts, Springer, 2017, 155–161 | DOI
[18] D. P. Kingma, J. Ba, Adam: A method for stochastic optimization, 2017, arXiv: 1412.6980 [cs.LG] | Zbl
[19] N. Reimers, I. Gurevych, “Making monolingual sentence embeddings multilingual using knowledge distillation”, Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), 2020, 4512–4525 | DOI
[20] N. Reimers, I. Gurevych, “Sentence-BERT: Sentence embeddings using siamese BERT-networks”, Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing, Association for Computational Linguistics, 2019 | DOI
[21] P. Qi, Y. Zhang, Y. Zhang, J. Bolton, C. D. Manning, “Stanza: A Python natural language processing toolkit for many human languages”, Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations, Association for Computational Linguistics, 2020, 101–108 | DOI
[22] M. Korobov, “Morphological analyzer and generator for Russian and Ukrainian languages”, International Conference on Analysis of Images, Social Networks and Texts, Springer, 2015, 320–332 | DOI
[23] E. Loper, S. Bird, “NLTK: The natural language toolkit”, Proceedings of the ACL-02 Workshop on Effective Tools and Methodologies for Teaching Natural Language Processing and Computational Linguistics, 2002, 63–70
[24] A. Glazkova, D. Morozov, M. Vorobeva, A. Stupnikov, “Keyphrase generation for the Russian-language scientific texts using mT5”, Modeling and Analysis of Information Systems, 30:4 (2023), 418–428 | DOI
[25] L. Xue et al, “mT5: A massively multilingual pre-trained text-to-text transformer”, Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2021, 483–498 | DOI
[26] C. Raffel et al, “Exploring the limits of transfer learning with a unified text-to-text transformer”, The Journal of Machine Learning Research, 21:1 (2020), 5485–5551 | MR
[27] T. Wolf et al, “Transformers: State-of-the-art natural language processing”, Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, 2020, 38–45 | DOI
[28] O. Lyashevskaya, S. Sharov, Chastotnyj slovar' sovremennogo russkogo yazyka: na materialah Nacional'nogo korpusa russkogo yazyka, Azbukovnik, 2009 (in Russian)
[29] B. L. Iomdin, How to define words with the same root?, Russian Speech = Russkaya RechT, 1 (2019), 109–115 (in Russian) | DOI
[30] A. Sorokin, A. Kravtsova, “Deep convolutional networks for supervised morpheme segmentation of Russian language”, Artificial Intelligence and Natural Language, Springer International Publishing, Cham, 2018, 3–10 | DOI
[31] E. I. Bolshakova, A. S. Sapin, “Comparing models of morpheme analysis for Russian words based on machine learning”, Komp'juternaja Lingvistika I Intellektual'nye Tehnologii, 18 (2019), 104–113
[32] E. Bolshakova, A. Sapin, “Bi-LSTM model for morpheme segmentation of Russian words”, Artificial Intelligence and Natural Language, Springer International Publishing, Cham, 2019, 151–160 | DOI
[33] A. N. Tikhonov, Slovoobrazovatel'nyi slovar' russkogo yazyka, Russkiy yazyk, M., 1990 (in Russian)
[34] T. Garipov, D. Morozov, A. Glazkova, “Generalization ability of CNN-based Morpheme Segmentation”, 2023 Ivannikov Ispras Open Conference (ISPRAS), 2024, 58–62
[35] A. I. Kuznetsova, T. F. Efremova, Dictionary of Morphemes of the Russian Language, Firebird Publications, Incorporated, 1986, 1136 pp.
[36] T. Cover, A. Joy, “Entropy, relative entropy, and mutual information”, Elements of Information Theory, ch. 2, John Wiley Sons, Ltd, 2005, 13–55 | DOI | MR
[37] L. Breiman, J. Friedman, C. J. Stone, R. Olshen, Classification and Regression Trees, Chapman and Hall/CRC, 1984 | MR
[38] A. Altmann, L. Tolosi, O. Sander, T. Lengauer, “Permutation importance: A corrected feature importance measure”, Bioinformatics (Oxford, England), 26:10 (2010), 1340–1347 | DOI